<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <title>pln_inco.bioscope.scripts</title>
  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  <script type="text/javascript" src="epydoc.js"></script>
</head>

<body bgcolor="white" text="black" link="blue" vlink="#204080"
      alink="#204080">
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="pln_inco-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

      <th class="navbar" width="100%"></th>
  </tr>
</table>
<table width="100%" cellpadding="0" cellspacing="0">
  <tr valign="top">
    <td width="100%">
      <span class="breadcrumbs">
        <a href="pln_inco-module.html">Package&nbsp;pln_inco</a> ::
        <a href="pln_inco.bioscope-module.html">Package&nbsp;bioscope</a> ::
        Module&nbsp;scripts
      </span>
    </td>
    <td>
      <table cellpadding="0" cellspacing="0">
        <!-- hide/show private -->
        <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
    onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
        <tr><td align="right"><span class="options"
            >[<a href="frames.html" target="_top">frames</a
            >]&nbsp;|&nbsp;<a href="pln_inco.bioscope.scripts-pysrc.html"
            target="_top">no&nbsp;frames</a>]</span></td></tr>
      </table>
    </td>
  </tr>
</table>
<h1 class="epydoc">Source Code for <a href="pln_inco.bioscope.scripts-module.html">Module pln_inco.bioscope.scripts</a></h1>
<pre class="py-src">
<a name="L1"></a><tt class="py-lineno">  1</tt>  <tt class="py-line"><tt class="py-comment"># -*- coding: utf-8 -*- </tt> </tt>
<a name="L2"></a><tt class="py-lineno">  2</tt>  <tt class="py-line"> </tt>
<a name="L3"></a><tt class="py-lineno">  3</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">nltk</tt><tt class="py-op">,</tt><tt class="py-name">nltk</tt><tt class="py-op">.</tt><tt class="py-name">tokenize</tt><tt class="py-op">,</tt><tt class="py-name">xml</tt><tt class="py-op">.</tt><tt class="py-name">etree</tt> </tt>
<a name="L4"></a><tt class="py-lineno">  4</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">os</tt><tt class="py-op">,</tt><tt class="py-name">codecs</tt><tt class="py-op">,</tt><tt class="py-name">fnmatch</tt><tt class="py-op">,</tt><tt class="py-name">re</tt><tt class="py-op">,</tt><tt class="py-name">types</tt><tt class="py-op">,</tt> <tt class="py-name">copy</tt><tt class="py-op">,</tt><tt class="py-name">shutil</tt> </tt>
<a name="L5"></a><tt class="py-lineno">  5</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">pickle</tt> </tt>
<a name="L6"></a><tt class="py-lineno">  6</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt class="py-name">sys</tt> <tt class="py-keyword">import</tt> <tt class="py-op">*</tt> </tt>
<a name="L7"></a><tt class="py-lineno">  7</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt id="link-0" class="py-name" targets="Package pln_inco=pln_inco-module.html"><a title="pln_inco" class="py-name" href="#" onclick="return doclink('link-0', 'pln_inco', 'link-0');">pln_inco</a></tt> <tt class="py-keyword">import</tt> <tt id="link-1" class="py-name" targets="Module pln_inco.graphviz=pln_inco.graphviz-module.html"><a title="pln_inco.graphviz" class="py-name" href="#" onclick="return doclink('link-1', 'graphviz', 'link-1');">graphviz</a></tt><tt class="py-op">,</tt><tt id="link-2" class="py-name" targets="Module pln_inco.penn_treebank=pln_inco.penn_treebank-module.html"><a title="pln_inco.penn_treebank" class="py-name" href="#" onclick="return doclink('link-2', 'penn_treebank', 'link-2');">penn_treebank</a></tt><tt class="py-op">,</tt><tt id="link-3" class="py-name" targets="Module pln_inco.stanford_parser=pln_inco.stanford_parser-module.html"><a title="pln_inco.stanford_parser" class="py-name" href="#" onclick="return doclink('link-3', 'stanford_parser', 'link-3');">stanford_parser</a></tt><tt class="py-op">,</tt><tt id="link-4" class="py-name" targets="Module pln_inco.genia_tagger=pln_inco.genia_tagger-module.html"><a title="pln_inco.genia_tagger" class="py-name" href="#" onclick="return doclink('link-4', 'genia_tagger', 'link-4');">genia_tagger</a></tt> </tt>
<a name="L8"></a><tt class="py-lineno">  8</tt>  <tt class="py-line"><tt class="py-keyword">from</tt> <tt class="py-name">string</tt> <tt class="py-keyword">import</tt> <tt class="py-op">*</tt> </tt>
<a name="L9"></a><tt class="py-lineno">  9</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt id="link-5" class="py-name"><a title="pln_inco" class="py-name" href="#" onclick="return doclink('link-5', 'pln_inco', 'link-0');">pln_inco</a></tt><tt class="py-op">.</tt><tt id="link-6" class="py-name" targets="Package pln_inco.bioscope=pln_inco.bioscope-module.html"><a title="pln_inco.bioscope" class="py-name" href="#" onclick="return doclink('link-6', 'bioscope', 'link-6');">bioscope</a></tt><tt class="py-op">.</tt><tt id="link-7" class="py-name" targets="Module pln_inco.bioscope.util=pln_inco.bioscope.util-module.html,Package pln_inco.util=pln_inco.util-module.html"><a title="pln_inco.bioscope.util
pln_inco.util" class="py-name" href="#" onclick="return doclink('link-7', 'util', 'link-7');">util</a></tt> </tt>
<a name="L10"></a><tt class="py-lineno"> 10</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">time</tt> </tt>
<a name="L11"></a><tt class="py-lineno"> 11</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">sqlite3</tt> </tt>
<a name="L12"></a><tt class="py-lineno"> 12</tt>  <tt class="py-line"><tt class="py-keyword">import</tt> <tt class="py-name">random</tt> </tt>
<a name="L13"></a><tt class="py-lineno"> 13</tt>  <tt class="py-line"> </tt>
<a name="L14"></a><tt class="py-lineno"> 14</tt>  <tt class="py-line"> </tt>
<a name="gen_text_files"></a><div id="gen_text_files-def"><a name="L15"></a><tt class="py-lineno"> 15</tt> <a class="py-toggle" href="#" id="gen_text_files-toggle" onclick="return toggle('gen_text_files');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#gen_text_files">gen_text_files</a><tt class="py-op">(</tt><tt class="py-param">bcp</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="gen_text_files-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="gen_text_files-expanded"><a name="L16"></a><tt class="py-lineno"> 16</tt>  <tt class="py-line">        <tt class="py-docstring">""" Genera documentos solamente con el texto de las oraciones"""</tt> </tt>
<a name="L17"></a><tt class="py-lineno"> 17</tt>  <tt class="py-line"> </tt>
<a name="L18"></a><tt class="py-lineno"> 18</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">docset</tt> <tt class="py-keyword">in</tt> <tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">original_bioscope_corpus</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> <tt class="py-comment"># Recorro los document set (en este caso es uno solo)</tt> </tt>
<a name="L19"></a><tt class="py-lineno"> 19</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-name">doc</tt> <tt class="py-keyword">in</tt> <tt class="py-name">docset</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> <tt class="py-comment"># Recorro los documentos       </tt> </tt>
<a name="L20"></a><tt class="py-lineno"> 20</tt>  <tt class="py-line">                        <tt class="py-comment"># Identificador del documento</tt> </tt>
<a name="L21"></a><tt class="py-lineno"> 21</tt>  <tt class="py-line">                        <tt class="py-name">docId</tt><tt class="py-op">=</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">text</tt>  </tt>
<a name="L22"></a><tt class="py-lineno"> 22</tt>  <tt class="py-line">                        <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Proceso archivo "</tt><tt class="py-op">,</tt><tt class="py-name">docId</tt> </tt>
<a name="L23"></a><tt class="py-lineno"> 23</tt>  <tt class="py-line"> </tt>
<a name="L24"></a><tt class="py-lineno"> 24</tt>  <tt class="py-line">                        <tt class="py-comment"># Titulo</tt> </tt>
<a name="L25"></a><tt class="py-lineno"> 25</tt>  <tt class="py-line">                        <tt class="py-name">title</tt><tt class="py-op">=</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> </tt>
<a name="L26"></a><tt class="py-lineno"> 26</tt>  <tt class="py-line">                        <tt class="py-name">title_id</tt><tt class="py-op">=</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">get</tt><tt class="py-op">(</tt><tt class="py-string">'id'</tt><tt class="py-op">)</tt> </tt>
<a name="L27"></a><tt class="py-lineno"> 27</tt>  <tt class="py-line"> </tt>
<a name="L28"></a><tt class="py-lineno"> 28</tt>  <tt class="py-line">                        <tt class="py-comment"># Ahora proceso las oraciones del texto</tt> </tt>
<a name="L29"></a><tt class="py-lineno"> 29</tt>  <tt class="py-line">                        <tt class="py-name">doc_sentences</tt><tt class="py-op">=</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">2</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L30"></a><tt class="py-lineno"> 30</tt>  <tt class="py-line">                        <tt class="py-name">sentences</tt><tt class="py-op">=</tt><tt class="py-op">[</tt><tt class="py-op">(</tt><tt class="py-name">x</tt><tt class="py-op">,</tt><tt id="link-8" class="py-name"><a title="pln_inco" class="py-name" href="#" onclick="return doclink('link-8', 'pln_inco', 'link-0');">pln_inco</a></tt><tt class="py-op">.</tt><tt id="link-9" class="py-name"><a title="pln_inco.bioscope" class="py-name" href="#" onclick="return doclink('link-9', 'bioscope', 'link-6');">bioscope</a></tt><tt class="py-op">.</tt><tt id="link-10" class="py-name"><a title="pln_inco.bioscope.util
pln_inco.util" class="py-name" href="#" onclick="return doclink('link-10', 'util', 'link-7');">util</a></tt><tt class="py-op">.</tt><tt id="link-11" class="py-name" targets="Function pln_inco.bioscope.util.bioscope_get_text()=pln_inco.bioscope.util-module.html#bioscope_get_text"><a title="pln_inco.bioscope.util.bioscope_get_text" class="py-name" href="#" onclick="return doclink('link-11', 'bioscope_get_text', 'link-11');">bioscope_get_text</a></tt><tt class="py-op">(</tt><tt class="py-name">x</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> <tt class="py-keyword">for</tt> <tt class="py-name">x</tt> <tt class="py-keyword">in</tt> <tt class="py-name">doc_sentences</tt><tt class="py-op">]</tt> </tt>
<a name="L31"></a><tt class="py-lineno"> 31</tt>  <tt class="py-line"> </tt>
<a name="L32"></a><tt class="py-lineno"> 32</tt>  <tt class="py-line">                        <tt class="py-comment"># Genero el documento</tt> </tt>
<a name="L33"></a><tt class="py-lineno"> 33</tt>  <tt class="py-line">                        <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Genero documento txt..."</tt><tt class="py-op">,</tt><tt class="py-name">docId</tt> </tt>
<a name="L34"></a><tt class="py-lineno"> 34</tt>  <tt class="py-line">                        <tt class="py-name">fileName</tt><tt class="py-op">=</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">txt_dir</tt><tt class="py-op">,</tt><tt class="py-string">'a'</tt><tt class="py-op">+</tt><tt class="py-name">docId</tt><tt class="py-op">+</tt><tt class="py-string">'.txt'</tt><tt class="py-op">)</tt> </tt>
<a name="L35"></a><tt class="py-lineno"> 35</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">=</tt><tt class="py-name">codecs</tt><tt class="py-op">.</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">fileName</tt><tt class="py-op">,</tt><tt class="py-string">'w'</tt><tt class="py-op">,</tt> <tt class="py-name">encoding</tt><tt class="py-op">=</tt><tt class="py-string">'utf-8'</tt><tt class="py-op">)</tt> </tt>
<a name="L36"></a><tt class="py-lineno"> 36</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt id="link-12" class="py-name"><a title="pln_inco" class="py-name" href="#" onclick="return doclink('link-12', 'pln_inco', 'link-0');">pln_inco</a></tt><tt class="py-op">.</tt><tt id="link-13" class="py-name"><a title="pln_inco.bioscope" class="py-name" href="#" onclick="return doclink('link-13', 'bioscope', 'link-6');">bioscope</a></tt><tt class="py-op">.</tt><tt id="link-14" class="py-name"><a title="pln_inco.bioscope.util
pln_inco.util" class="py-name" href="#" onclick="return doclink('link-14', 'util', 'link-7');">util</a></tt><tt class="py-op">.</tt><tt id="link-15" class="py-name"><a title="pln_inco.bioscope.util.bioscope_get_text" class="py-name" href="#" onclick="return doclink('link-15', 'bioscope_get_text', 'link-11');">bioscope_get_text</a></tt><tt class="py-op">(</tt><tt class="py-name">title</tt><tt class="py-op">)</tt><tt class="py-op">+</tt><tt class="py-string">'\n'</tt><tt class="py-op">)</tt> </tt>
<a name="L37"></a><tt class="py-lineno"> 37</tt>  <tt class="py-line">                        <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt class="py-name">doc_sentence</tt><tt class="py-op">,</tt><tt class="py-name">sentence_text</tt><tt class="py-op">)</tt> <tt class="py-keyword">in</tt> <tt class="py-name">sentences</tt><tt class="py-op">:</tt> </tt>
<a name="L38"></a><tt class="py-lineno"> 38</tt>  <tt class="py-line">                                <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">sentence_text</tt><tt class="py-op">+</tt><tt class="py-string">'\n'</tt><tt class="py-op">)</tt> </tt>
<a name="L39"></a><tt class="py-lineno"> 39</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L40"></a><tt class="py-lineno"> 40</tt>  <tt class="py-line"> </tt>
<a name="L41"></a><tt class="py-lineno"> 41</tt>  <tt class="py-line">                                 </tt>
<a name="gen_bioscope_files"></a><div id="gen_bioscope_files-def"><a name="L42"></a><tt class="py-lineno"> 42</tt> <a class="py-toggle" href="#" id="gen_bioscope_files-toggle" onclick="return toggle('gen_bioscope_files');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#gen_bioscope_files">gen_bioscope_files</a><tt class="py-op">(</tt><tt class="py-param">bcp</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="gen_bioscope_files-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="gen_bioscope_files-expanded"><a name="L43"></a><tt class="py-lineno"> 43</tt>  <tt class="py-line">        <tt class="py-docstring">""" </tt> </tt>
<a name="L44"></a><tt class="py-lineno"> 44</tt>  <tt class="py-line"><tt class="py-docstring">        Genera un archivo XML por cada documento del corpus bioscope </tt> </tt>
<a name="L45"></a><tt class="py-lineno"> 45</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L46"></a><tt class="py-lineno"> 46</tt>  <tt class="py-line"> </tt>
<a name="L47"></a><tt class="py-lineno"> 47</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">docset</tt> <tt class="py-keyword">in</tt> <tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">original_bioscope_corpus</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> <tt class="py-comment"># Recorro los document set (en este caso es uno solo)</tt> </tt>
<a name="L48"></a><tt class="py-lineno"> 48</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-name">doc</tt> <tt class="py-keyword">in</tt> <tt class="py-name">docset</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> <tt class="py-comment"># Recorro los documentos       </tt> </tt>
<a name="L49"></a><tt class="py-lineno"> 49</tt>  <tt class="py-line">                <tt class="py-comment"># Identificador del documento</tt> </tt>
<a name="L50"></a><tt class="py-lineno"> 50</tt>  <tt class="py-line">                        <tt class="py-name">docId</tt><tt class="py-op">=</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">text</tt>  </tt>
<a name="L51"></a><tt class="py-lineno"> 51</tt>  <tt class="py-line">                        <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Proceso archivo "</tt><tt class="py-op">,</tt><tt class="py-name">docId</tt> </tt>
<a name="L52"></a><tt class="py-lineno"> 52</tt>  <tt class="py-line"> </tt>
<a name="L53"></a><tt class="py-lineno"> 53</tt>  <tt class="py-line">                        <tt class="py-comment"># Titulo</tt> </tt>
<a name="L54"></a><tt class="py-lineno"> 54</tt>  <tt class="py-line">                        <tt class="py-name">title</tt><tt class="py-op">=</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> </tt>
<a name="L55"></a><tt class="py-lineno"> 55</tt>  <tt class="py-line">                        <tt class="py-name">title_id</tt><tt class="py-op">=</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">get</tt><tt class="py-op">(</tt><tt class="py-string">'id'</tt><tt class="py-op">)</tt> </tt>
<a name="L56"></a><tt class="py-lineno"> 56</tt>  <tt class="py-line"> </tt>
<a name="L57"></a><tt class="py-lineno"> 57</tt>  <tt class="py-line">                        <tt class="py-comment"># Ahora proceso las oraciones del texto</tt> </tt>
<a name="L58"></a><tt class="py-lineno"> 58</tt>  <tt class="py-line">                        <tt class="py-name">doc_sentences</tt><tt class="py-op">=</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">2</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L59"></a><tt class="py-lineno"> 59</tt>  <tt class="py-line">                        <tt class="py-name">sentences</tt><tt class="py-op">=</tt><tt class="py-op">[</tt><tt class="py-op">(</tt><tt class="py-name">x</tt><tt class="py-op">,</tt><tt id="link-16" class="py-name"><a title="pln_inco" class="py-name" href="#" onclick="return doclink('link-16', 'pln_inco', 'link-0');">pln_inco</a></tt><tt class="py-op">.</tt><tt id="link-17" class="py-name"><a title="pln_inco.bioscope" class="py-name" href="#" onclick="return doclink('link-17', 'bioscope', 'link-6');">bioscope</a></tt><tt class="py-op">.</tt><tt id="link-18" class="py-name"><a title="pln_inco.bioscope.util
pln_inco.util" class="py-name" href="#" onclick="return doclink('link-18', 'util', 'link-7');">util</a></tt><tt class="py-op">.</tt><tt id="link-19" class="py-name"><a title="pln_inco.bioscope.util.bioscope_get_text" class="py-name" href="#" onclick="return doclink('link-19', 'bioscope_get_text', 'link-11');">bioscope_get_text</a></tt><tt class="py-op">(</tt><tt class="py-name">x</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> <tt class="py-keyword">for</tt> <tt class="py-name">x</tt> <tt class="py-keyword">in</tt> <tt class="py-name">doc_sentences</tt><tt class="py-op">]</tt> </tt>
<a name="L60"></a><tt class="py-lineno"> 60</tt>  <tt class="py-line"> </tt>
<a name="L61"></a><tt class="py-lineno"> 61</tt>  <tt class="py-line"> </tt>
<a name="L62"></a><tt class="py-lineno"> 62</tt>  <tt class="py-line">                        <tt class="py-comment"># Generaci&#65155;&#1635;n de las marcas de bioscope</tt> </tt>
<a name="L63"></a><tt class="py-lineno"> 63</tt>  <tt class="py-line">                        <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Genero tags de bioscope..."</tt><tt class="py-op">,</tt><tt class="py-name">docId</tt> </tt>
<a name="L64"></a><tt class="py-lineno"> 64</tt>  <tt class="py-line">                        <tt class="py-name">fileName</tt><tt class="py-op">=</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">bioscope_files_dir</tt><tt class="py-op">,</tt><tt class="py-string">'a'</tt><tt class="py-op">+</tt><tt class="py-name">docId</tt><tt class="py-op">+</tt><tt class="py-string">'.bioscope'</tt><tt class="py-op">)</tt> </tt>
<a name="L65"></a><tt class="py-lineno"> 65</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">fileName</tt><tt class="py-op">,</tt><tt class="py-string">'w'</tt><tt class="py-op">)</tt> </tt>
<a name="L66"></a><tt class="py-lineno"> 66</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-string">'&lt;?xml version="1.0" encoding="utf-8"?&gt;'</tt><tt class="py-op">)</tt> </tt>
<a name="L67"></a><tt class="py-lineno"> 67</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-string">'&lt;Annotation&gt;'</tt><tt class="py-op">)</tt> </tt>
<a name="L68"></a><tt class="py-lineno"> 68</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">strip</tt><tt class="py-op">(</tt><tt class="py-name">xml</tt><tt class="py-op">.</tt><tt class="py-name">etree</tt><tt class="py-op">.</tt><tt class="py-name">ElementTree</tt><tt class="py-op">.</tt><tt class="py-name">tostring</tt><tt class="py-op">(</tt><tt class="py-name">title</tt><tt class="py-op">,</tt><tt class="py-string">'utf-8'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">+</tt><tt class="py-string">'\n'</tt><tt class="py-op">)</tt> </tt>
<a name="L69"></a><tt class="py-lineno"> 69</tt>  <tt class="py-line">                        <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt class="py-name">doc_sentence</tt><tt class="py-op">,</tt><tt class="py-name">sentence_text</tt><tt class="py-op">)</tt> <tt class="py-keyword">in</tt> <tt class="py-name">sentences</tt><tt class="py-op">:</tt> </tt>
<a name="L70"></a><tt class="py-lineno"> 70</tt>  <tt class="py-line">                                <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">strip</tt><tt class="py-op">(</tt><tt class="py-name">xml</tt><tt class="py-op">.</tt><tt class="py-name">etree</tt><tt class="py-op">.</tt><tt class="py-name">ElementTree</tt><tt class="py-op">.</tt><tt class="py-name">tostring</tt><tt class="py-op">(</tt><tt class="py-name">doc_sentence</tt><tt class="py-op">,</tt><tt class="py-string">'utf-8'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">+</tt><tt class="py-string">'\n'</tt><tt class="py-op">)</tt> </tt>
<a name="L71"></a><tt class="py-lineno"> 71</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-string">'&lt;/Annotation&gt;'</tt><tt class="py-op">)</tt> </tt>
<a name="L72"></a><tt class="py-lineno"> 72</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L73"></a><tt class="py-lineno"> 73</tt>  <tt class="py-line"> </tt>
<a name="L74"></a><tt class="py-lineno"> 74</tt>  <tt class="py-line"> </tt>
<a name="create_single_text_file"></a><div id="create_single_text_file-def"><a name="L75"></a><tt class="py-lineno"> 75</tt> <a class="py-toggle" href="#" id="create_single_text_file-toggle" onclick="return toggle('create_single_text_file');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#create_single_text_file">create_single_text_file</a><tt class="py-op">(</tt><tt class="py-param">bcp</tt><tt class="py-op">,</tt><tt class="py-param">pattern</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="create_single_text_file-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="create_single_text_file-expanded"><a name="L76"></a><tt class="py-lineno"> 76</tt>  <tt class="py-line">        <tt class="py-docstring">""" </tt> </tt>
<a name="L77"></a><tt class="py-lineno"> 77</tt>  <tt class="py-line"><tt class="py-docstring">        A partir de los archivos .txt del corpus que cumplen con pattern, genera un archivo &#250;nico. Es para facilitar el proceso de an&#65155;&#173;lisis con el tagger de GENIA.</tt> </tt>
<a name="L78"></a><tt class="py-lineno"> 78</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L79"></a><tt class="py-lineno"> 79</tt>  <tt class="py-line"> </tt>
<a name="L80"></a><tt class="py-lineno"> 80</tt>  <tt class="py-line">        <tt class="py-name">output</tt><tt class="py-op">=</tt><tt class="py-string">''</tt> </tt>
<a name="L81"></a><tt class="py-lineno"> 81</tt>  <tt class="py-line">        <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Genero archivo temporal para genia..."</tt> </tt>
<a name="L82"></a><tt class="py-lineno"> 82</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">fileName</tt> <tt class="py-keyword">in</tt> <tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">listdir</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">txt_dir</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L83"></a><tt class="py-lineno"> 83</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">fnmatch</tt><tt class="py-op">.</tt><tt class="py-name">fnmatch</tt><tt class="py-op">(</tt><tt class="py-name">fileName</tt><tt class="py-op">,</tt><tt class="py-name">pattern</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L84"></a><tt class="py-lineno"> 84</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">txt_dir</tt><tt class="py-op">,</tt><tt class="py-name">fileName</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-string">'r'</tt><tt class="py-op">)</tt> </tt>
<a name="L85"></a><tt class="py-lineno"> 85</tt>  <tt class="py-line">                        <tt class="py-name">output</tt><tt class="py-op">=</tt><tt class="py-name">output</tt><tt class="py-op">+</tt><tt class="py-string">'========='</tt><tt class="py-op">+</tt><tt class="py-name">fileName</tt><tt class="py-op">+</tt><tt class="py-string">'=========\n'</tt><tt class="py-op">+</tt><tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">read</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L86"></a><tt class="py-lineno"> 86</tt>  <tt class="py-line">                        <tt class="py-name">temp</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_temp_file</tt><tt class="py-op">,</tt><tt class="py-string">"w"</tt><tt class="py-op">)</tt> </tt>
<a name="L87"></a><tt class="py-lineno"> 87</tt>  <tt class="py-line">                        <tt class="py-name">temp</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">output</tt><tt class="py-op">)</tt> </tt>
<a name="L88"></a><tt class="py-lineno"> 88</tt>  <tt class="py-line">                        <tt class="py-name">temp</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L89"></a><tt class="py-lineno"> 89</tt>  <tt class="py-line"> </tt>
<a name="genia_tag"></a><div id="genia_tag-def"><a name="L90"></a><tt class="py-lineno"> 90</tt> <a class="py-toggle" href="#" id="genia_tag-toggle" onclick="return toggle('genia_tag');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#genia_tag">genia_tag</a><tt class="py-op">(</tt><tt class="py-param">bcp</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="genia_tag-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="genia_tag-expanded"><a name="L91"></a><tt class="py-lineno"> 91</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L92"></a><tt class="py-lineno"> 92</tt>  <tt class="py-line"><tt class="py-docstring">        Procesa el archivo con los textos del corpus, y lo analiza con Genia</tt> </tt>
<a name="L93"></a><tt class="py-lineno"> 93</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L94"></a><tt class="py-lineno"> 94</tt>  <tt class="py-line">        <tt class="py-comment"># Primero tiene que copiar el archivo al home de Genia</tt> </tt>
<a name="L95"></a><tt class="py-lineno"> 95</tt>  <tt class="py-line">        <tt class="py-name">shutil</tt><tt class="py-op">.</tt><tt class="py-name">copy</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_temp_file</tt><tt class="py-op">,</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_home</tt><tt class="py-op">)</tt> </tt>
<a name="L96"></a><tt class="py-lineno"> 96</tt>  <tt class="py-line">        <tt class="py-name">result</tt><tt class="py-op">=</tt><tt id="link-20" class="py-name"><a title="pln_inco.genia_tagger" class="py-name" href="#" onclick="return doclink('link-20', 'genia_tagger', 'link-4');">genia_tagger</a></tt><tt class="py-op">.</tt><tt id="link-21" class="py-name" targets="Function pln_inco.genia_tagger.tag()=pln_inco.genia_tagger-module.html#tag"><a title="pln_inco.genia_tagger.tag" class="py-name" href="#" onclick="return doclink('link-21', 'tag', 'link-21');">tag</a></tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_temp_file</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_home</tt><tt class="py-op">)</tt> </tt>
<a name="L97"></a><tt class="py-lineno"> 97</tt>  <tt class="py-line">        <tt class="py-name">temp</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_temp_results_file</tt><tt class="py-op">,</tt><tt class="py-string">"w"</tt><tt class="py-op">)</tt> </tt>
<a name="L98"></a><tt class="py-lineno"> 98</tt>  <tt class="py-line">        <tt class="py-name">temp</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">result</tt><tt class="py-op">)</tt> </tt>
<a name="L99"></a><tt class="py-lineno"> 99</tt>  <tt class="py-line">        <tt class="py-name">temp</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L100"></a><tt class="py-lineno">100</tt>  <tt class="py-line">         </tt>
<a name="L101"></a><tt class="py-lineno">101</tt>  <tt class="py-line"> </tt>
<a name="gen_genia_files"></a><div id="gen_genia_files-def"><a name="L102"></a><tt class="py-lineno">102</tt> <a class="py-toggle" href="#" id="gen_genia_files-toggle" onclick="return toggle('gen_genia_files');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#gen_genia_files">gen_genia_files</a><tt class="py-op">(</tt><tt class="py-param">bcp</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="gen_genia_files-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="gen_genia_files-expanded"><a name="L103"></a><tt class="py-lineno">103</tt>  <tt class="py-line">        <tt class="py-docstring">""" </tt> </tt>
<a name="L104"></a><tt class="py-lineno">104</tt>  <tt class="py-line"><tt class="py-docstring">        Procesa el archivo resultado del an&#65155;&#173;lisis de Genia, y genera un archivo para cada documento y oraci&#243;n del corpus. </tt> </tt>
<a name="L105"></a><tt class="py-lineno">105</tt>  <tt class="py-line"><tt class="py-docstring">        </tt> </tt>
<a name="L106"></a><tt class="py-lineno">106</tt>  <tt class="py-line"><tt class="py-docstring">        Lo generado por cada documento es un archivo en formato lema/POS, listo para ser proceado directamente por el Stanford Parser, corrigiendo algunos problemas en la salida </tt> </tt>
<a name="L107"></a><tt class="py-lineno">107</tt>  <tt class="py-line"><tt class="py-docstring">        del tagger de GENIA y cambiando algunos formados para el PennTreeBank.</tt> </tt>
<a name="L108"></a><tt class="py-lineno">108</tt>  <tt class="py-line"><tt class="py-docstring">        </tt> </tt>
<a name="L109"></a><tt class="py-lineno">109</tt>  <tt class="py-line"><tt class="py-docstring">        Tambi&#233;n genera un archivo con los atributos de GENIA (adem&#65155;&#173;s del POS, NER y chunking), por cada oraci&#243;n, en un formato igual al que larga GENIA</tt> </tt>
<a name="L110"></a><tt class="py-lineno">110</tt>  <tt class="py-line"><tt class="py-docstring">        </tt> </tt>
<a name="L111"></a><tt class="py-lineno">111</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L112"></a><tt class="py-lineno">112</tt>  <tt class="py-line"> </tt>
<a name="L113"></a><tt class="py-lineno">113</tt>  <tt class="py-line">        <tt class="py-comment"># Leo el archivo temporal</tt> </tt>
<a name="L114"></a><tt class="py-lineno">114</tt>  <tt class="py-line">        <tt class="py-name">f</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_temp_results_file</tt><tt class="py-op">,</tt><tt class="py-string">'r'</tt><tt class="py-op">)</tt> </tt>
<a name="L115"></a><tt class="py-lineno">115</tt>  <tt class="py-line">        <tt class="py-name">lineas</tt><tt class="py-op">=</tt><tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">readlines</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L116"></a><tt class="py-lineno">116</tt>  <tt class="py-line"> </tt>
<a name="L117"></a><tt class="py-lineno">117</tt>  <tt class="py-line">        <tt class="py-comment"># Proceso las oraciones</tt> </tt>
<a name="L118"></a><tt class="py-lineno">118</tt>  <tt class="py-line">        <tt class="py-name">sentence_id</tt><tt class="py-op">=</tt><tt class="py-name">None</tt> </tt>
<a name="L119"></a><tt class="py-lineno">119</tt>  <tt class="py-line">        <tt class="py-name">s</tt><tt class="py-op">=</tt><tt class="py-string">''</tt> <tt class="py-comment"># Texto generado para la oraci&#243;n</tt> </tt>
<a name="L120"></a><tt class="py-lineno">120</tt>  <tt class="py-line">        <tt class="py-name">s_articulo</tt><tt class="py-op">=</tt><tt class="py-string">''</tt> <tt class="py-comment"># Texto generado para el art&#237;culo</tt> </tt>
<a name="L121"></a><tt class="py-lineno">121</tt>  <tt class="py-line">        <tt class="py-name">position</tt><tt class="py-op">=</tt><tt class="py-number">0</tt> </tt>
<a name="L122"></a><tt class="py-lineno">122</tt>  <tt class="py-line">        <tt class="py-comment"># Indica si tengo que omitir procesar una l&#237;nea porque la junt&#233; con la l&#237;nea anterior</tt> </tt>
<a name="L123"></a><tt class="py-lineno">123</tt>  <tt class="py-line">        <tt class="py-name">saltar_lineas</tt><tt class="py-op">=</tt><tt class="py-number">0</tt> </tt>
<a name="L124"></a><tt class="py-lineno">124</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">l</tt> <tt class="py-keyword">in</tt> <tt class="py-name">lineas</tt><tt class="py-op">:</tt> </tt>
<a name="L125"></a><tt class="py-lineno">125</tt>  <tt class="py-line">                <tt class="py-comment">#print &gt;&gt; stderr, "Proceso la linea ",l, "en la posicion ",position</tt> </tt>
<a name="L126"></a><tt class="py-lineno">126</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">saltar_lineas</tt> <tt class="py-op">&gt;</tt> <tt class="py-number">0</tt><tt class="py-op">:</tt> </tt>
<a name="L127"></a><tt class="py-lineno">127</tt>  <tt class="py-line">                        <tt class="py-name">saltar_lineas</tt> <tt class="py-op">=</tt> <tt class="py-name">saltar_lineas</tt><tt class="py-op">-</tt><tt class="py-number">1</tt> </tt>
<a name="L128"></a><tt class="py-lineno">128</tt>  <tt class="py-line">                <tt class="py-keyword">elif</tt> <tt class="py-name">l</tt><tt class="py-op">.</tt><tt class="py-name">startswith</tt><tt class="py-op">(</tt><tt class="py-string">'========='</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L129"></a><tt class="py-lineno">129</tt>  <tt class="py-line"> </tt>
<a name="L130"></a><tt class="py-lineno">130</tt>  <tt class="py-line">                        <tt class="py-comment"># Obtengo el id del art&#237;culo</tt> </tt>
<a name="L131"></a><tt class="py-lineno">131</tt>  <tt class="py-line">                        <tt class="py-name">l</tt><tt class="py-op">=</tt><tt class="py-name">l</tt><tt class="py-op">.</tt><tt class="py-name">replace</tt><tt class="py-op">(</tt><tt class="py-string">'========='</tt><tt class="py-op">,</tt><tt class="py-string">''</tt><tt class="py-op">)</tt> </tt>
<a name="L132"></a><tt class="py-lineno">132</tt>  <tt class="py-line">                        <tt class="py-name">index</tt><tt class="py-op">=</tt><tt class="py-name">l</tt><tt class="py-op">.</tt><tt class="py-name">find</tt><tt class="py-op">(</tt><tt class="py-string">'\t'</tt><tt class="py-op">)</tt> </tt>
<a name="L133"></a><tt class="py-lineno">133</tt>  <tt class="py-line">                        <tt class="py-name">docId</tt><tt class="py-op">=</tt><tt class="py-name">l</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">:</tt><tt class="py-name">index</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">replace</tt><tt class="py-op">(</tt><tt class="py-string">'.txt'</tt><tt class="py-op">,</tt><tt class="py-string">''</tt><tt class="py-op">)</tt> </tt>
<a name="L134"></a><tt class="py-lineno">134</tt>  <tt class="py-line"> </tt>
<a name="L135"></a><tt class="py-lineno">135</tt>  <tt class="py-line">                        <tt class="py-comment"># Obtengo el arbolito xml corrpondiente al art&#237;culo, a partir del .bioscope</tt> </tt>
<a name="L136"></a><tt class="py-lineno">136</tt>  <tt class="py-line">                        <tt class="py-name">bioscope_doc</tt><tt class="py-op">=</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">bioscope_files_corpus</tt><tt class="py-op">.</tt><tt class="py-name">xml</tt><tt class="py-op">(</tt><tt class="py-name">docId</tt><tt class="py-op">+</tt><tt class="py-string">'.bioscope'</tt><tt class="py-op">)</tt> </tt>
<a name="L137"></a><tt class="py-lineno">137</tt>  <tt class="py-line">                        <tt class="py-name">sentences</tt><tt class="py-op">=</tt><tt class="py-name">bioscope_doc</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L138"></a><tt class="py-lineno">138</tt>  <tt class="py-line"> </tt>
<a name="L139"></a><tt class="py-lineno">139</tt>  <tt class="py-line">                        <tt class="py-name">sentence_pos</tt><tt class="py-op">=</tt><tt class="py-number">0</tt> </tt>
<a name="L140"></a><tt class="py-lineno">140</tt>  <tt class="py-line">                        <tt class="py-name">s_articulo</tt><tt class="py-op">=</tt><tt class="py-string">''</tt> </tt>
<a name="L141"></a><tt class="py-lineno">141</tt>  <tt class="py-line">                        <tt class="py-name">after_linea</tt><tt class="py-op">=</tt><tt class="py-name">True</tt> </tt>
<a name="L142"></a><tt class="py-lineno">142</tt>  <tt class="py-line">                <tt class="py-keyword">elif</tt> <tt class="py-name">l</tt><tt class="py-op">==</tt><tt class="py-string">'\n'</tt> <tt class="py-keyword">and</tt> <tt class="py-keyword">not</tt> <tt class="py-name">after_linea</tt><tt class="py-op">:</tt> </tt>
<a name="L143"></a><tt class="py-lineno">143</tt>  <tt class="py-line">                        <tt class="py-comment"># Estoy en un enter separador de oraciones</tt> </tt>
<a name="L144"></a><tt class="py-lineno">144</tt>  <tt class="py-line">                        <tt class="py-comment"># Genero el archivo correspondiente a la oraci&#243;n que ven&#65155;&#65181;a procesando</tt> </tt>
<a name="L145"></a><tt class="py-lineno">145</tt>  <tt class="py-line">                        <tt class="py-name">geniaFileName</tt><tt class="py-op">=</tt><tt class="py-name">docId</tt><tt class="py-op">+</tt><tt class="py-string">'.'</tt><tt class="py-op">+</tt><tt class="py-name">sentence_id</tt><tt class="py-op">+</tt><tt class="py-string">'.genia'</tt> </tt>
<a name="L146"></a><tt class="py-lineno">146</tt>  <tt class="py-line">                        <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Genero archivo ..."</tt><tt class="py-op">,</tt> <tt class="py-name">geniaFileName</tt> </tt>
<a name="L147"></a><tt class="py-lineno">147</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_files_dir</tt><tt class="py-op">,</tt><tt class="py-name">geniaFileName</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-string">'w+'</tt><tt class="py-op">)</tt> </tt>
<a name="L148"></a><tt class="py-lineno">148</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">s</tt><tt class="py-op">)</tt> </tt>
<a name="L149"></a><tt class="py-lineno">149</tt>  <tt class="py-line">                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L150"></a><tt class="py-lineno">150</tt>  <tt class="py-line"> </tt>
<a name="L151"></a><tt class="py-lineno">151</tt>  <tt class="py-line">                        <tt class="py-comment"># Inserto un enter en la salida del art&#237;culo</tt> </tt>
<a name="L152"></a><tt class="py-lineno">152</tt>  <tt class="py-line">                        <tt class="py-name">s_articulo</tt><tt class="py-op">+=</tt> <tt class="py-string">'\n'</tt> </tt>
<a name="L153"></a><tt class="py-lineno">153</tt>  <tt class="py-line"> </tt>
<a name="L154"></a><tt class="py-lineno">154</tt>  <tt class="py-line">                        <tt class="py-comment"># Incremento el n&#250;mero de oraci&#243;n</tt> </tt>
<a name="L155"></a><tt class="py-lineno">155</tt>  <tt class="py-line">                        <tt class="py-keyword">try</tt><tt class="py-op">:</tt> </tt>
<a name="L156"></a><tt class="py-lineno">156</tt>  <tt class="py-line">                                <tt class="py-name">sentence_pos</tt> <tt class="py-op">+=</tt> <tt class="py-number">1</tt> </tt>
<a name="L157"></a><tt class="py-lineno">157</tt>  <tt class="py-line">                                <tt class="py-name">sentence_id</tt><tt class="py-op">=</tt><tt class="py-name">sentences</tt><tt class="py-op">[</tt><tt class="py-name">sentence_pos</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">get</tt><tt class="py-op">(</tt><tt class="py-string">'id'</tt><tt class="py-op">)</tt>                            </tt>
<a name="L158"></a><tt class="py-lineno">158</tt>  <tt class="py-line">                                <tt class="py-name">s</tt><tt class="py-op">=</tt><tt class="py-string">''</tt>                             </tt>
<a name="L159"></a><tt class="py-lineno">159</tt>  <tt class="py-line">                        <tt class="py-keyword">except</tt> <tt class="py-name">IndexError</tt><tt class="py-op">:</tt> </tt>
<a name="L160"></a><tt class="py-lineno">160</tt>  <tt class="py-line">                                <tt class="py-comment"># Como ya no quedan oraciones, genero el .genia del art&#237;culo</tt> </tt>
<a name="L161"></a><tt class="py-lineno">161</tt>  <tt class="py-line">                                <tt class="py-name">geniaFileName</tt><tt class="py-op">=</tt><tt class="py-name">docId</tt><tt class="py-op">+</tt><tt class="py-string">'.genia'</tt> </tt>
<a name="L162"></a><tt class="py-lineno">162</tt>  <tt class="py-line">                                <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Genero archivo ..."</tt><tt class="py-op">,</tt> <tt class="py-name">geniaFileName</tt> </tt>
<a name="L163"></a><tt class="py-lineno">163</tt>  <tt class="py-line">                                <tt class="py-name">f</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_articles_dir</tt><tt class="py-op">,</tt><tt class="py-name">geniaFileName</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-string">'w+'</tt><tt class="py-op">)</tt> </tt>
<a name="L164"></a><tt class="py-lineno">164</tt>  <tt class="py-line">                                <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">s_articulo</tt><tt class="py-op">)</tt> </tt>
<a name="L165"></a><tt class="py-lineno">165</tt>  <tt class="py-line">                                <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L166"></a><tt class="py-lineno">166</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt><tt class="py-op">=</tt><tt class="py-string">''</tt> </tt>
<a name="L167"></a><tt class="py-lineno">167</tt>  <tt class="py-line"> </tt>
<a name="L168"></a><tt class="py-lineno">168</tt>  <tt class="py-line">                <tt class="py-keyword">elif</tt> <tt class="py-name">l</tt><tt class="py-op">==</tt><tt class="py-string">'\n'</tt><tt class="py-op">:</tt> </tt>
<a name="L169"></a><tt class="py-lineno">169</tt>  <tt class="py-line">                        <tt class="py-comment"># Estoy en el primer enter, incremento el n&#65155;&#65233;mero de oraci&#243;n</tt> </tt>
<a name="L170"></a><tt class="py-lineno">170</tt>  <tt class="py-line">                        <tt class="py-name">after_linea</tt><tt class="py-op">=</tt><tt class="py-name">False</tt> </tt>
<a name="L171"></a><tt class="py-lineno">171</tt>  <tt class="py-line">                        <tt class="py-name">sentence_id</tt><tt class="py-op">=</tt><tt class="py-name">sentences</tt><tt class="py-op">[</tt><tt class="py-name">sentence_pos</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">get</tt><tt class="py-op">(</tt><tt class="py-string">'id'</tt><tt class="py-op">)</tt>                            </tt>
<a name="L172"></a><tt class="py-lineno">172</tt>  <tt class="py-line">                        <tt class="py-name">s</tt><tt class="py-op">=</tt><tt class="py-string">''</tt>                             </tt>
<a name="L173"></a><tt class="py-lineno">173</tt>  <tt class="py-line">                <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L174"></a><tt class="py-lineno">174</tt>  <tt class="py-line">                        <tt class="py-comment"># Incremento la oraci&#243;n hasta el momento con la palabra</tt> </tt>
<a name="L175"></a><tt class="py-lineno">175</tt>  <tt class="py-line">                        <tt class="py-comment">#s += l</tt> </tt>
<a name="L176"></a><tt class="py-lineno">176</tt>  <tt class="py-line">                        <tt class="py-comment"># Para el caso de lo generado para el art&#237;culo, proceso la l&#237;nea para que </tt> </tt>
<a name="L177"></a><tt class="py-lineno">177</tt>  <tt class="py-line">                        <tt class="py-comment"># quede pronta para ser utilizada por el parser</tt> </tt>
<a name="L178"></a><tt class="py-lineno">178</tt>  <tt class="py-line">                        <tt class="py-comment"># El formato que genero es palabra/tag, con procesamiento para que quede como el penntreebank</tt> </tt>
<a name="L179"></a><tt class="py-lineno">179</tt>  <tt class="py-line">                        <tt class="py-comment"># Primero me quedo con las columnas 1 y 3</tt> </tt>
<a name="L180"></a><tt class="py-lineno">180</tt>  <tt class="py-line">                        <tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">,</tt><tt class="py-name">lemma</tt><tt class="py-op">,</tt><tt class="py-name">pos</tt><tt class="py-op">,</tt><tt class="py-name">chunk</tt><tt class="py-op">,</tt><tt class="py-name">ne</tt><tt class="py-op">)</tt><tt class="py-op">=</tt><tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-name">l</tt><tt class="py-op">,</tt><tt class="py-string">'\t'</tt><tt class="py-op">)</tt> </tt>
<a name="L181"></a><tt class="py-lineno">181</tt>  <tt class="py-line">                        <tt class="py-comment"># Quito el enter al ne</tt> </tt>
<a name="L182"></a><tt class="py-lineno">182</tt>  <tt class="py-line">                        <tt class="py-name">ne</tt><tt class="py-op">=</tt><tt class="py-name">ne</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">:</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">ne</tt><tt class="py-op">)</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> </tt>
<a name="L183"></a><tt class="py-lineno">183</tt>  <tt class="py-line"> </tt>
<a name="L184"></a><tt class="py-lineno">184</tt>  <tt class="py-line">                        <tt class="py-comment">#print &gt;&gt; stderr, "Position vale",position</tt> </tt>
<a name="L185"></a><tt class="py-lineno">185</tt>  <tt class="py-line">                        <tt class="py-comment">#print &gt;&gt; stderr, "Largo de lineas ",len(lineas)</tt> </tt>
<a name="L186"></a><tt class="py-lineno">186</tt>  <tt class="py-line">                        <tt class="py-comment">#print &gt;&gt; stderr, "split(lineas[position+1],'\t') ",split(lineas[position+1],'\t')</tt> </tt>
<a name="L187"></a><tt class="py-lineno">187</tt>  <tt class="py-line">                        <tt class="py-comment">#print &gt;&gt; stderr, "split(lineas[position+2],'\t') ",split(lineas[position+2],'\t')</tt> </tt>
<a name="L188"></a><tt class="py-lineno">188</tt>  <tt class="py-line">                        <tt class="py-comment"># Genia, en la &#65155;&#65233;ltima palabra de la oraci&#243;n, no tokeniza bien</tt> </tt>
<a name="L189"></a><tt class="py-lineno">189</tt>  <tt class="py-line">                        <tt class="py-comment"># Por lo que si la oraci&#243;n termina en un ., hay que generar dos tokens</tt> </tt>
<a name="L190"></a><tt class="py-lineno">190</tt>  <tt class="py-line">                        <tt class="py-keyword">if</tt> <tt class="py-name">pos</tt><tt class="py-op">==</tt><tt class="py-string">'.'</tt> <tt class="py-keyword">and</tt> <tt class="py-name">lemma</tt><tt class="py-op">&lt;&gt;</tt><tt class="py-string">'.'</tt> <tt class="py-keyword">and</tt> <tt class="py-name">word</tt><tt class="py-op">.</tt><tt class="py-name">endswith</tt><tt class="py-op">(</tt><tt class="py-string">'.'</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L191"></a><tt class="py-lineno">191</tt>  <tt class="py-line">                                <tt class="py-comment"># Genia tiene un bug, que hace que en algunos casos si es la &#250;ltima palabra</tt> </tt>
<a name="L192"></a><tt class="py-lineno">192</tt>  <tt class="py-line">                                <tt class="py-comment"># No s&#65155;&#1635;lo la pega con el punto (como hace en general)</tt> </tt>
<a name="L193"></a><tt class="py-lineno">193</tt>  <tt class="py-line">                                <tt class="py-comment"># Sino que adem&#225;s le pone el tag '.'</tt> </tt>
<a name="L194"></a><tt class="py-lineno">194</tt>  <tt class="py-line">                                <tt class="py-comment"># Ver la oraci&#243;n S19.8, del documento a91187647</tt> </tt>
<a name="L195"></a><tt class="py-lineno">195</tt>  <tt class="py-line"> </tt>
<a name="L196"></a><tt class="py-lineno">196</tt>  <tt class="py-line">                                <tt class="py-comment"># Lo que hago es separar en palabra y punto</tt> </tt>
<a name="L197"></a><tt class="py-lineno">197</tt>  <tt class="py-line">                                <tt class="py-comment"># Y ponerle a prepo el tag "NN" a la palabra que genia omiti&#65155;&#1635; clasificar</tt> </tt>
<a name="L198"></a><tt class="py-lineno">198</tt>  <tt class="py-line"> </tt>
<a name="L199"></a><tt class="py-lineno">199</tt>  <tt class="py-line">                                <tt class="py-name">word</tt><tt class="py-op">=</tt><tt class="py-name">word</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">:</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">)</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> </tt>
<a name="L200"></a><tt class="py-lineno">200</tt>  <tt class="py-line">                                <tt class="py-name">lemma</tt><tt class="py-op">=</tt><tt class="py-name">word</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">:</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">)</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> </tt>
<a name="L201"></a><tt class="py-lineno">201</tt>  <tt class="py-line">                                <tt class="py-name">pos</tt><tt class="py-op">=</tt><tt class="py-string">'NN'</tt> </tt>
<a name="L202"></a><tt class="py-lineno">202</tt>  <tt class="py-line">                                <tt class="py-name">chunk</tt><tt class="py-op">=</tt><tt class="py-string">'O'</tt> </tt>
<a name="L203"></a><tt class="py-lineno">203</tt>  <tt class="py-line">                                <tt class="py-name">ne</tt><tt class="py-op">=</tt><tt class="py-string">'O'</tt> </tt>
<a name="L204"></a><tt class="py-lineno">204</tt>  <tt class="py-line"> </tt>
<a name="L205"></a><tt class="py-lineno">205</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt> <tt class="py-string">'/'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt class="py-name">word</tt><tt class="py-op">,</tt><tt class="py-name">pos</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L206"></a><tt class="py-lineno">206</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt> <tt class="py-string">' '</tt> </tt>
<a name="L207"></a><tt class="py-lineno">207</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt> <tt class="py-string">'/'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt class="py-string">'.'</tt><tt class="py-op">,</tt><tt class="py-string">'.'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L208"></a><tt class="py-lineno">208</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt> <tt class="py-string">' '</tt> </tt>
<a name="L209"></a><tt class="py-lineno">209</tt>  <tt class="py-line"> </tt>
<a name="L210"></a><tt class="py-lineno">210</tt>  <tt class="py-line"> </tt>
<a name="L211"></a><tt class="py-lineno">211</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt> <tt class="py-string">'\t'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt class="py-name">word</tt><tt class="py-op">,</tt><tt class="py-name">lemma</tt><tt class="py-op">,</tt><tt class="py-name">pos</tt><tt class="py-op">,</tt><tt class="py-name">chunk</tt><tt class="py-op">,</tt><tt class="py-name">ne</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L212"></a><tt class="py-lineno">212</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt> <tt class="py-string">'\n'</tt> </tt>
<a name="L213"></a><tt class="py-lineno">213</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt> <tt class="py-string">'\t'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt class="py-string">'.'</tt><tt class="py-op">,</tt><tt class="py-string">'.'</tt><tt class="py-op">,</tt><tt class="py-string">'.'</tt><tt class="py-op">,</tt><tt class="py-string">'O'</tt><tt class="py-op">,</tt><tt class="py-string">'O'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L214"></a><tt class="py-lineno">214</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt> <tt class="py-string">'\n'</tt>                                        </tt>
<a name="L215"></a><tt class="py-lineno">215</tt>  <tt class="py-line">                        <tt class="py-keyword">elif</tt> <tt class="py-name">word</tt><tt class="py-op">.</tt><tt class="py-name">endswith</tt><tt class="py-op">(</tt><tt class="py-string">'.'</tt><tt class="py-op">)</tt> <tt class="py-keyword">and</tt> <tt class="py-name">pos</tt><tt class="py-op">&lt;&gt;</tt><tt class="py-string">'.'</tt> <tt class="py-keyword">and</tt> <tt class="py-name">lineas</tt><tt class="py-op">[</tt><tt class="py-name">position</tt><tt class="py-op">+</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-string">'\n'</tt><tt class="py-op">:</tt> </tt>
<a name="L216"></a><tt class="py-lineno">216</tt>  <tt class="py-line">                                <tt class="py-comment"># Si termina en punto, pero el lemma no es punto, y es la &#65155;&#65233;ltima palabra</tt> </tt>
<a name="L217"></a><tt class="py-lineno">217</tt>  <tt class="py-line">                                <tt class="py-comment"># Genia peg&#65155;&#1635; la &#65155;&#65233;ltima palabra con el punto</tt> </tt>
<a name="L218"></a><tt class="py-lineno">218</tt>  <tt class="py-line">                                <tt class="py-comment"># por lo que los separo en dos</tt> </tt>
<a name="L219"></a><tt class="py-lineno">219</tt>  <tt class="py-line">                                <tt class="py-name">lemma</tt><tt class="py-op">=</tt><tt class="py-name">lemma</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">:</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">lemma</tt><tt class="py-op">)</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> </tt>
<a name="L220"></a><tt class="py-lineno">220</tt>  <tt class="py-line">                                <tt class="py-name">word</tt><tt class="py-op">=</tt><tt class="py-name">word</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">:</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">)</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> </tt>
<a name="L221"></a><tt class="py-lineno">221</tt>  <tt class="py-line"> </tt>
<a name="L222"></a><tt class="py-lineno">222</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt> <tt class="py-string">'/'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt class="py-name">word</tt><tt class="py-op">,</tt><tt class="py-name">pos</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L223"></a><tt class="py-lineno">223</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt> <tt class="py-string">' '</tt> </tt>
<a name="L224"></a><tt class="py-lineno">224</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt> <tt class="py-string">'/'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt class="py-string">'.'</tt><tt class="py-op">,</tt><tt class="py-string">'.'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L225"></a><tt class="py-lineno">225</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt> <tt class="py-string">' '</tt> </tt>
<a name="L226"></a><tt class="py-lineno">226</tt>  <tt class="py-line"> </tt>
<a name="L227"></a><tt class="py-lineno">227</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt> <tt class="py-string">'\t'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt class="py-name">word</tt><tt class="py-op">,</tt><tt class="py-name">lemma</tt><tt class="py-op">,</tt><tt class="py-name">pos</tt><tt class="py-op">,</tt><tt class="py-name">chunk</tt><tt class="py-op">,</tt><tt class="py-name">ne</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L228"></a><tt class="py-lineno">228</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt> <tt class="py-string">'\n'</tt> </tt>
<a name="L229"></a><tt class="py-lineno">229</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt> <tt class="py-string">'\t'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt class="py-string">'.'</tt><tt class="py-op">,</tt><tt class="py-string">'.'</tt><tt class="py-op">,</tt><tt class="py-string">'.'</tt><tt class="py-op">,</tt><tt class="py-string">'O'</tt><tt class="py-op">,</tt><tt class="py-string">'O'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L230"></a><tt class="py-lineno">230</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt> <tt class="py-string">'\n'</tt>                                        </tt>
<a name="L231"></a><tt class="py-lineno">231</tt>  <tt class="py-line"> </tt>
<a name="L232"></a><tt class="py-lineno">232</tt>  <tt class="py-line"> </tt>
<a name="L233"></a><tt class="py-lineno">233</tt>  <tt class="py-line">                        <tt class="py-keyword">elif</tt> <tt class="py-name">pos</tt><tt class="py-op">==</tt><tt class="py-string">'CD'</tt> <tt class="py-keyword">and</tt> <tt class="py-name">position</tt><tt class="py-op">&lt;=</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">lineas</tt><tt class="py-op">)</tt><tt class="py-op">-</tt><tt class="py-number">3</tt> <tt class="py-keyword">and</tt> <tt class="py-name">lineas</tt><tt class="py-op">[</tt><tt class="py-name">position</tt><tt class="py-op">+</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">&lt;&gt;</tt><tt class="py-string">'\n'</tt> <tt class="py-keyword">and</tt> <tt class="py-name">lineas</tt><tt class="py-op">[</tt><tt class="py-name">position</tt><tt class="py-op">+</tt><tt class="py-number">2</tt><tt class="py-op">]</tt><tt class="py-op">&lt;&gt;</tt><tt class="py-string">'\n'</tt> <tt class="py-keyword">and</tt> <tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-name">lineas</tt><tt class="py-op">[</tt><tt class="py-name">position</tt><tt class="py-op">+</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-string">'\t'</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">2</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-string">','</tt> <tt class="py-keyword">and</tt> <tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-name">lineas</tt><tt class="py-op">[</tt><tt class="py-name">position</tt><tt class="py-op">+</tt><tt class="py-number">2</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-string">'\t'</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">2</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-string">'CD'</tt><tt class="py-op">:</tt> </tt>
<a name="L234"></a><tt class="py-lineno">234</tt>  <tt class="py-line">                                <tt class="py-comment"># Si estoy en una situacion de numero,numero, genia lo separa err&#65155;&#1635;neamente</tt> </tt>
<a name="L235"></a><tt class="py-lineno">235</tt>  <tt class="py-line">                                <tt class="py-comment"># tengo que juntarlas</tt> </tt>
<a name="L236"></a><tt class="py-lineno">236</tt>  <tt class="py-line">                                <tt class="py-comment"># el pos, chunk y el ne quedan como el primero</tt> </tt>
<a name="L237"></a><tt class="py-lineno">237</tt>  <tt class="py-line">                                <tt class="py-comment"># tengo luego que saltarme las dos lineas siguientes</tt> </tt>
<a name="L238"></a><tt class="py-lineno">238</tt>  <tt class="py-line">                                <tt class="py-name">siguiente_numero</tt><tt class="py-op">=</tt><tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-name">lineas</tt><tt class="py-op">[</tt><tt class="py-name">position</tt><tt class="py-op">+</tt><tt class="py-number">2</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-string">'\t'</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> </tt>
<a name="L239"></a><tt class="py-lineno">239</tt>  <tt class="py-line">                                <tt class="py-name">word</tt><tt class="py-op">=</tt><tt class="py-name">word</tt><tt class="py-op">+</tt><tt class="py-string">','</tt><tt class="py-op">+</tt><tt class="py-name">siguiente_numero</tt> </tt>
<a name="L240"></a><tt class="py-lineno">240</tt>  <tt class="py-line">                                <tt class="py-name">lemma</tt><tt class="py-op">=</tt><tt class="py-name">lemma</tt><tt class="py-op">+</tt><tt class="py-string">','</tt><tt class="py-op">+</tt><tt class="py-name">siguiente_numero</tt> </tt>
<a name="L241"></a><tt class="py-lineno">241</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt><tt class="py-string">'/'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt id="link-22" class="py-name"><a title="pln_inco.penn_treebank" class="py-name" href="#" onclick="return doclink('link-22', 'penn_treebank', 'link-2');">penn_treebank</a></tt><tt class="py-op">.</tt><tt id="link-23" class="py-name" targets="Function pln_inco.penn_treebank.ptb_conversion_word()=pln_inco.penn_treebank-module.html#ptb_conversion_word"><a title="pln_inco.penn_treebank.ptb_conversion_word" class="py-name" href="#" onclick="return doclink('link-23', 'ptb_conversion_word', 'link-23');">ptb_conversion_word</a></tt><tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt id="link-24" class="py-name"><a title="pln_inco.penn_treebank" class="py-name" href="#" onclick="return doclink('link-24', 'penn_treebank', 'link-2');">penn_treebank</a></tt><tt class="py-op">.</tt><tt id="link-25" class="py-name" targets="Function pln_inco.penn_treebank.ptb_conversion_pos()=pln_inco.penn_treebank-module.html#ptb_conversion_pos"><a title="pln_inco.penn_treebank.ptb_conversion_pos" class="py-name" href="#" onclick="return doclink('link-25', 'ptb_conversion_pos', 'link-25');">ptb_conversion_pos</a></tt><tt class="py-op">(</tt><tt class="py-name">pos</tt><tt class="py-op">)</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L242"></a><tt class="py-lineno">242</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt> <tt class="py-string">' '</tt> </tt>
<a name="L243"></a><tt class="py-lineno">243</tt>  <tt class="py-line"> </tt>
<a name="L244"></a><tt class="py-lineno">244</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt> <tt class="py-string">'\t'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt id="link-26" class="py-name"><a title="pln_inco.penn_treebank" class="py-name" href="#" onclick="return doclink('link-26', 'penn_treebank', 'link-2');">penn_treebank</a></tt><tt class="py-op">.</tt><tt id="link-27" class="py-name"><a title="pln_inco.penn_treebank.ptb_conversion_word" class="py-name" href="#" onclick="return doclink('link-27', 'ptb_conversion_word', 'link-23');">ptb_conversion_word</a></tt><tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-name">lemma</tt><tt class="py-op">,</tt><tt id="link-28" class="py-name"><a title="pln_inco.penn_treebank" class="py-name" href="#" onclick="return doclink('link-28', 'penn_treebank', 'link-2');">penn_treebank</a></tt><tt class="py-op">.</tt><tt id="link-29" class="py-name"><a title="pln_inco.penn_treebank.ptb_conversion_pos" class="py-name" href="#" onclick="return doclink('link-29', 'ptb_conversion_pos', 'link-25');">ptb_conversion_pos</a></tt><tt class="py-op">(</tt><tt class="py-name">pos</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-name">chunk</tt><tt class="py-op">,</tt><tt class="py-name">ne</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L245"></a><tt class="py-lineno">245</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt><tt class="py-string">'\n'</tt> </tt>
<a name="L246"></a><tt class="py-lineno">246</tt>  <tt class="py-line">                                <tt class="py-name">saltar_lineas</tt><tt class="py-op">=</tt><tt class="py-number">2</tt>                                  </tt>
<a name="L247"></a><tt class="py-lineno">247</tt>  <tt class="py-line">                        <tt class="py-keyword">else</tt><tt class="py-op">:</tt>                                    </tt>
<a name="L248"></a><tt class="py-lineno">248</tt>  <tt class="py-line">                                <tt class="py-comment"># Caso general</tt> </tt>
<a name="L249"></a><tt class="py-lineno">249</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt><tt class="py-string">'/'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt id="link-30" class="py-name"><a title="pln_inco.penn_treebank" class="py-name" href="#" onclick="return doclink('link-30', 'penn_treebank', 'link-2');">penn_treebank</a></tt><tt class="py-op">.</tt><tt id="link-31" class="py-name"><a title="pln_inco.penn_treebank.ptb_conversion_word" class="py-name" href="#" onclick="return doclink('link-31', 'ptb_conversion_word', 'link-23');">ptb_conversion_word</a></tt><tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt id="link-32" class="py-name"><a title="pln_inco.penn_treebank" class="py-name" href="#" onclick="return doclink('link-32', 'penn_treebank', 'link-2');">penn_treebank</a></tt><tt class="py-op">.</tt><tt id="link-33" class="py-name"><a title="pln_inco.penn_treebank.ptb_conversion_pos" class="py-name" href="#" onclick="return doclink('link-33', 'ptb_conversion_pos', 'link-25');">ptb_conversion_pos</a></tt><tt class="py-op">(</tt><tt class="py-name">pos</tt><tt class="py-op">)</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L250"></a><tt class="py-lineno">250</tt>  <tt class="py-line">                                <tt class="py-name">s_articulo</tt> <tt class="py-op">+=</tt> <tt class="py-string">' '</tt> </tt>
<a name="L251"></a><tt class="py-lineno">251</tt>  <tt class="py-line"> </tt>
<a name="L252"></a><tt class="py-lineno">252</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt> <tt class="py-string">'\t'</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-op">[</tt><tt id="link-34" class="py-name"><a title="pln_inco.penn_treebank" class="py-name" href="#" onclick="return doclink('link-34', 'penn_treebank', 'link-2');">penn_treebank</a></tt><tt class="py-op">.</tt><tt id="link-35" class="py-name"><a title="pln_inco.penn_treebank.ptb_conversion_word" class="py-name" href="#" onclick="return doclink('link-35', 'ptb_conversion_word', 'link-23');">ptb_conversion_word</a></tt><tt class="py-op">(</tt><tt class="py-name">word</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-name">lemma</tt><tt class="py-op">,</tt><tt id="link-36" class="py-name"><a title="pln_inco.penn_treebank" class="py-name" href="#" onclick="return doclink('link-36', 'penn_treebank', 'link-2');">penn_treebank</a></tt><tt class="py-op">.</tt><tt id="link-37" class="py-name"><a title="pln_inco.penn_treebank.ptb_conversion_pos" class="py-name" href="#" onclick="return doclink('link-37', 'ptb_conversion_pos', 'link-25');">ptb_conversion_pos</a></tt><tt class="py-op">(</tt><tt class="py-name">pos</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-name">chunk</tt><tt class="py-op">,</tt><tt class="py-name">ne</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L253"></a><tt class="py-lineno">253</tt>  <tt class="py-line">                                <tt class="py-name">s</tt> <tt class="py-op">+=</tt><tt class="py-string">'\n'</tt> </tt>
<a name="L254"></a><tt class="py-lineno">254</tt>  <tt class="py-line">                <tt class="py-name">position</tt><tt class="py-op">=</tt><tt class="py-name">position</tt><tt class="py-op">+</tt><tt class="py-number">1</tt> </tt>
</div><a name="L255"></a><tt class="py-lineno">255</tt>  <tt class="py-line"> </tt>
<a name="L256"></a><tt class="py-lineno">256</tt>  <tt class="py-line"> </tt>
<a name="gen_parsed_files"></a><div id="gen_parsed_files-def"><a name="L257"></a><tt class="py-lineno">257</tt> <a class="py-toggle" href="#" id="gen_parsed_files-toggle" onclick="return toggle('gen_parsed_files');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#gen_parsed_files">gen_parsed_files</a><tt class="py-op">(</tt><tt class="py-param">pattern</tt><tt class="py-op">,</tt><tt class="py-param">bcp</tt><tt class="py-op">,</tt><tt class="py-param">regenerate</tt><tt class="py-op">=</tt><tt class="py-name">True</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="gen_parsed_files-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="gen_parsed_files-expanded"><a name="L258"></a><tt class="py-lineno">258</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L259"></a><tt class="py-lineno">259</tt>  <tt class="py-line"><tt class="py-docstring">        Ejecuta el parser de stanford para los archivos con extensi&#65155;&#1635;n .genia que cumplan con el patr&#65155;&#1635;n. Guarda los resultados en archivos .parsed</tt> </tt>
<a name="L260"></a><tt class="py-lineno">260</tt>  <tt class="py-line"><tt class="py-docstring">        @arg pattern: patr&#65155;&#1635;n que indica los documentos a parsear</tt> </tt>
<a name="L261"></a><tt class="py-lineno">261</tt>  <tt class="py-line"><tt class="py-docstring">        @arg regenerate: indica si se deben regenerar archivos ya generados</tt> </tt>
<a name="L262"></a><tt class="py-lineno">262</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L263"></a><tt class="py-lineno">263</tt>  <tt class="py-line"> </tt>
<a name="L264"></a><tt class="py-lineno">264</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">fileName</tt> <tt class="py-keyword">in</tt> <tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">listdir</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_articles_dir</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L265"></a><tt class="py-lineno">265</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">fnmatch</tt><tt class="py-op">.</tt><tt class="py-name">fnmatch</tt><tt class="py-op">(</tt><tt class="py-name">fileName</tt><tt class="py-op">,</tt><tt class="py-name">pattern</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L266"></a><tt class="py-lineno">266</tt>  <tt class="py-line">                         </tt>
<a name="L267"></a><tt class="py-lineno">267</tt>  <tt class="py-line">                        <tt class="py-keyword">print</tt> <tt class="py-string">'Pruebo si existe:'</tt><tt class="py-op">,</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">parsed_files_dir</tt><tt class="py-op">,</tt><tt class="py-name">fileName</tt><tt class="py-op">.</tt><tt class="py-name">replace</tt><tt class="py-op">(</tt><tt class="py-string">'.genia'</tt><tt class="py-op">,</tt><tt class="py-string">'.parsed'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L268"></a><tt class="py-lineno">268</tt>  <tt class="py-line">                        <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">exists</tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">parsed_files_dir</tt><tt class="py-op">,</tt><tt class="py-name">fileName</tt><tt class="py-op">.</tt><tt class="py-name">replace</tt><tt class="py-op">(</tt><tt class="py-string">'.genia'</tt><tt class="py-op">,</tt><tt class="py-string">'.parsed'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> <tt class="py-keyword">or</tt> <tt class="py-name">regenerate</tt><tt class="py-op">:</tt> </tt>
<a name="L269"></a><tt class="py-lineno">269</tt>  <tt class="py-line">                                <tt class="py-keyword">print</tt><tt class="py-op">&gt;&gt;</tt><tt class="py-name">stderr</tt><tt class="py-op">,</tt>  <tt class="py-string">'Parseo...:'</tt><tt class="py-op">+</tt><tt class="py-name">fileName</tt> </tt>
<a name="L270"></a><tt class="py-lineno">270</tt>  <tt class="py-line">                                <tt class="py-name">source</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_articles_dir</tt><tt class="py-op">,</tt><tt class="py-name">fileName</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-string">'r'</tt><tt class="py-op">)</tt> </tt>
<a name="L271"></a><tt class="py-lineno">271</tt>  <tt class="py-line">                                <tt class="py-name">entrada</tt><tt class="py-op">=</tt><tt class="py-name">source</tt><tt class="py-op">.</tt><tt class="py-name">read</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L272"></a><tt class="py-lineno">272</tt>  <tt class="py-line">                                <tt class="py-name">result</tt><tt class="py-op">=</tt><tt id="link-38" class="py-name"><a title="pln_inco.stanford_parser" class="py-name" href="#" onclick="return doclink('link-38', 'stanford_parser', 'link-3');">stanford_parser</a></tt><tt class="py-op">.</tt><tt id="link-39" class="py-name" targets="Function pln_inco.stanford_parser.parse()=pln_inco.stanford_parser-module.html#parse"><a title="pln_inco.stanford_parser.parse" class="py-name" href="#" onclick="return doclink('link-39', 'parse', 'link-39');">parse</a></tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_articles_dir</tt><tt class="py-op">,</tt><tt class="py-name">fileName</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">parser_grammar_file</tt><tt class="py-op">)</tt> </tt>
<a name="L273"></a><tt class="py-lineno">273</tt>  <tt class="py-line">                                <tt class="py-name">dest</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">parsed_files_dir</tt><tt class="py-op">,</tt><tt class="py-name">fileName</tt><tt class="py-op">.</tt><tt class="py-name">replace</tt><tt class="py-op">(</tt><tt class="py-string">'.genia'</tt><tt class="py-op">,</tt><tt class="py-string">'.parsed'</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-string">'w'</tt><tt class="py-op">)</tt> </tt>
<a name="L274"></a><tt class="py-lineno">274</tt>  <tt class="py-line">                                <tt class="py-name">dest</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">result</tt><tt class="py-op">)</tt> </tt>
<a name="L275"></a><tt class="py-lineno">275</tt>  <tt class="py-line">                                <tt class="py-name">source</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L276"></a><tt class="py-lineno">276</tt>  <tt class="py-line">                                <tt class="py-name">dest</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L277"></a><tt class="py-lineno">277</tt>  <tt class="py-line"> </tt>
<a name="copy_genia_event_files"></a><div id="copy_genia_event_files-def"><a name="L278"></a><tt class="py-lineno">278</tt> <a class="py-toggle" href="#" id="copy_genia_event_files-toggle" onclick="return toggle('copy_genia_event_files');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#copy_genia_event_files">copy_genia_event_files</a><tt class="py-op">(</tt><tt class="py-param">bcp</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="copy_genia_event_files-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="copy_genia_event_files-expanded"><a name="L279"></a><tt class="py-lineno">279</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L280"></a><tt class="py-lineno">280</tt>  <tt class="py-line"><tt class="py-docstring">        Copia los archivos de genia event correspondientes al corpus que estoy procesando. </tt> </tt>
<a name="L281"></a><tt class="py-lineno">281</tt>  <tt class="py-line"><tt class="py-docstring">        Recorre el corpus y va copiando los documentos correspondientes, seg&#65155;&#65233;n su nombre</tt> </tt>
<a name="L282"></a><tt class="py-lineno">282</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L283"></a><tt class="py-lineno">283</tt>  <tt class="py-line">         </tt>
<a name="L284"></a><tt class="py-lineno">284</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">docset</tt> <tt class="py-keyword">in</tt> <tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">original_bioscope_corpus</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> <tt class="py-comment"># Recorro los document set (en este caso es uno solo)</tt> </tt>
<a name="L285"></a><tt class="py-lineno">285</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-name">doc</tt> <tt class="py-keyword">in</tt> <tt class="py-name">docset</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> <tt class="py-comment"># Recorro los documentos       </tt> </tt>
<a name="L286"></a><tt class="py-lineno">286</tt>  <tt class="py-line">                        <tt class="py-comment"># Identificador del documento</tt> </tt>
<a name="L287"></a><tt class="py-lineno">287</tt>  <tt class="py-line">                        <tt class="py-name">docId</tt><tt class="py-op">=</tt><tt class="py-name">doc</tt><tt class="py-op">.</tt><tt class="py-name">getchildren</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">.</tt><tt class="py-name">text</tt>  </tt>
<a name="L288"></a><tt class="py-lineno">288</tt>  <tt class="py-line">                        <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Proceso archivo "</tt><tt class="py-op">,</tt><tt class="py-name">docId</tt> </tt>
<a name="L289"></a><tt class="py-lineno">289</tt>  <tt class="py-line"> </tt>
<a name="L290"></a><tt class="py-lineno">290</tt>  <tt class="py-line">                        <tt class="py-comment"># En vez de copiarlo, lo abro y lo grabo.</tt> </tt>
<a name="L291"></a><tt class="py-lineno">291</tt>  <tt class="py-line">                        <tt class="py-comment"># Esto me permite ponerle el css en la copia</tt> </tt>
<a name="L292"></a><tt class="py-lineno">292</tt>  <tt class="py-line">                        <tt class="py-keyword">try</tt><tt class="py-op">:</tt> </tt>
<a name="L293"></a><tt class="py-lineno">293</tt>  <tt class="py-line">                                <tt class="py-name">source</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">genia_event_corpus_dir</tt><tt class="py-op">,</tt><tt class="py-name">docId</tt><tt class="py-op">+</tt><tt class="py-string">'.xml'</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-string">'r'</tt><tt class="py-op">)</tt> </tt>
<a name="L294"></a><tt class="py-lineno">294</tt>  <tt class="py-line">                                <tt class="py-name">dest</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">event_dir</tt><tt class="py-op">,</tt><tt class="py-string">'a'</tt><tt class="py-op">+</tt><tt class="py-name">docId</tt><tt class="py-op">+</tt><tt class="py-string">'.event.xml'</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-string">'w+'</tt><tt class="py-op">)</tt> </tt>
<a name="L295"></a><tt class="py-lineno">295</tt>  <tt class="py-line">                                <tt class="py-name">lineas</tt><tt class="py-op">=</tt><tt class="py-name">source</tt><tt class="py-op">.</tt><tt class="py-name">readlines</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L296"></a><tt class="py-lineno">296</tt>  <tt class="py-line">                                <tt class="py-name">lineas</tt><tt class="py-op">.</tt><tt class="py-name">insert</tt><tt class="py-op">(</tt><tt class="py-number">1</tt><tt class="py-op">,</tt><tt class="py-string">'&lt;?xml-stylesheet href="genia_event.css" type="text/css"?&gt;'</tt><tt class="py-op">)</tt> </tt>
<a name="L297"></a><tt class="py-lineno">297</tt>  <tt class="py-line">                                <tt class="py-name">dest</tt><tt class="py-op">.</tt><tt class="py-name">writelines</tt><tt class="py-op">(</tt><tt class="py-name">lineas</tt><tt class="py-op">)</tt> </tt>
<a name="L298"></a><tt class="py-lineno">298</tt>  <tt class="py-line">                                <tt class="py-name">source</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L299"></a><tt class="py-lineno">299</tt>  <tt class="py-line">                                <tt class="py-name">dest</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L300"></a><tt class="py-lineno">300</tt>  <tt class="py-line">                        <tt class="py-keyword">except</tt> <tt class="py-name">IOError</tt><tt class="py-op">:</tt> </tt>
<a name="L301"></a><tt class="py-lineno">301</tt>  <tt class="py-line">                                <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt><tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">'No existe el archivo '</tt><tt class="py-op">+</tt> <tt class="py-name">docId</tt><tt class="py-op">+</tt><tt class="py-string">'.xml'</tt> </tt>
</div><a name="L302"></a><tt class="py-lineno">302</tt>  <tt class="py-line"> </tt>
<a name="L303"></a><tt class="py-lineno">303</tt>  <tt class="py-line"> </tt>
<a name="L304"></a><tt class="py-lineno">304</tt>  <tt class="py-line"> </tt>
<a name="draw_sentences"></a><div id="draw_sentences-def"><a name="L305"></a><tt class="py-lineno">305</tt> <a class="py-toggle" href="#" id="draw_sentences-toggle" onclick="return toggle('draw_sentences');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#draw_sentences">draw_sentences</a><tt class="py-op">(</tt><tt class="py-param">bc</tt><tt class="py-op">,</tt><tt class="py-param">bcp</tt><tt class="py-op">,</tt><tt class="py-param">only_hedge_and_negation_sentences</tt><tt class="py-op">=</tt><tt class="py-name">True</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="draw_sentences-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="draw_sentences-expanded"><a name="L306"></a><tt class="py-lineno">306</tt>  <tt class="py-line">                <tt class="py-docstring">""" </tt> </tt>
<a name="L307"></a><tt class="py-lineno">307</tt>  <tt class="py-line"><tt class="py-docstring">                Dado un corpus, genera todas las im&#65155;&#173;genes de los &#65155;&#173;rboles de an&#65155;&#173;lisis de sus oraciones</tt> </tt>
<a name="L308"></a><tt class="py-lineno">308</tt>  <tt class="py-line"><tt class="py-docstring">                @arg bc: Corpus</tt> </tt>
<a name="L309"></a><tt class="py-lineno">309</tt>  <tt class="py-line"><tt class="py-docstring">                @type bc: L{bioscope.BioscopeCorpus}</tt> </tt>
<a name="L310"></a><tt class="py-lineno">310</tt>  <tt class="py-line"><tt class="py-docstring">                @arg only_hedge_and_negation_sentences: indica si generar las im&#65155;&#173;genes para todas las oraciones, o s&#243;lo para aquellas que incluyen hedges o negaci&#243;n</tt> </tt>
<a name="L311"></a><tt class="py-lineno">311</tt>  <tt class="py-line"><tt class="py-docstring">                @type only_hedge_and_negation_sentences: Bool</tt> </tt>
<a name="L312"></a><tt class="py-lineno">312</tt>  <tt class="py-line"><tt class="py-docstring">                """</tt> </tt>
<a name="L313"></a><tt class="py-lineno">313</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt class="py-name">docId</tt><tt class="py-op">,</tt><tt class="py-name">d</tt><tt class="py-op">)</tt> <tt class="py-keyword">in</tt> <tt class="py-name">bc</tt><tt class="py-op">.</tt><tt class="py-name">documents</tt><tt class="py-op">.</tt><tt class="py-name">iteritems</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L314"></a><tt class="py-lineno">314</tt>  <tt class="py-line">                                <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Genero im&#65155;&#173;genes para el documento "</tt><tt class="py-op">,</tt><tt class="py-name">docId</tt> </tt>
<a name="L315"></a><tt class="py-lineno">315</tt>  <tt class="py-line">                                <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt class="py-name">sentenceId</tt><tt class="py-op">,</tt><tt class="py-name">sentence</tt><tt class="py-op">)</tt> <tt class="py-keyword">in</tt> <tt class="py-name">d</tt><tt class="py-op">.</tt><tt class="py-name">sentences</tt><tt class="py-op">.</tt><tt class="py-name">iteritems</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L316"></a><tt class="py-lineno">316</tt>  <tt class="py-line">                                        <tt class="py-keyword">if</tt> <tt class="py-name">sentence</tt><tt class="py-op">.</tt><tt class="py-name">data_loaded</tt><tt class="py-op">:</tt> </tt>
<a name="L317"></a><tt class="py-lineno">317</tt>  <tt class="py-line">                                                <tt class="py-keyword">if</tt> <tt class="py-name">sentence</tt><tt class="py-op">.</tt><tt id="link-40" class="py-name" targets="Method pln_inco.bioscope.BioscopeSentence.has_hedging()=pln_inco.bioscope.BioscopeSentence-class.html#has_hedging"><a title="pln_inco.bioscope.BioscopeSentence.has_hedging" class="py-name" href="#" onclick="return doclink('link-40', 'has_hedging', 'link-40');">has_hedging</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> <tt class="py-keyword">or</tt> <tt class="py-name">sentence</tt><tt class="py-op">.</tt><tt id="link-41" class="py-name" targets="Method pln_inco.bioscope.BioscopeSentence.has_negation()=pln_inco.bioscope.BioscopeSentence-class.html#has_negation"><a title="pln_inco.bioscope.BioscopeSentence.has_negation" class="py-name" href="#" onclick="return doclink('link-41', 'has_negation', 'link-41');">has_negation</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> <tt class="py-keyword">or</tt> <tt class="py-keyword">not</tt> <tt class="py-name">only_hedge_and_negation_sentences</tt><tt class="py-op">:</tt> </tt>
<a name="L318"></a><tt class="py-lineno">318</tt>  <tt class="py-line">                                                        <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Genero la imagen de la oracion "</tt><tt class="py-op">,</tt><tt class="py-name">sentenceId</tt> </tt>
<a name="L319"></a><tt class="py-lineno">319</tt>  <tt class="py-line">                                                        <tt class="py-name">dot_spec</tt><tt class="py-op">=</tt><tt class="py-name">sentence</tt><tt class="py-op">.</tt><tt id="link-42" class="py-name" targets="Method pln_inco.bioscope.BioscopeSentence.get_dot()=pln_inco.bioscope.BioscopeSentence-class.html#get_dot"><a title="pln_inco.bioscope.BioscopeSentence.get_dot" class="py-name" href="#" onclick="return doclink('link-42', 'get_dot', 'link-42');">get_dot</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L320"></a><tt class="py-lineno">320</tt>  <tt class="py-line">                                                        <tt class="py-name">salida_jpeg</tt><tt class="py-op">=</tt><tt id="link-43" class="py-name"><a title="pln_inco.graphviz" class="py-name" href="#" onclick="return doclink('link-43', 'graphviz', 'link-1');">graphviz</a></tt><tt class="py-op">.</tt><tt id="link-44" class="py-name" targets="Function pln_inco.graphviz.gen_jpeg_output()=pln_inco.graphviz-module.html#gen_jpeg_output"><a title="pln_inco.graphviz.gen_jpeg_output" class="py-name" href="#" onclick="return doclink('link-44', 'gen_jpeg_output', 'link-44');">gen_jpeg_output</a></tt><tt class="py-op">(</tt><tt class="py-name">dot_spec</tt><tt class="py-op">)</tt> </tt>
<a name="L321"></a><tt class="py-lineno">321</tt>  <tt class="py-line">                                                        <tt class="py-name">jpegFileName</tt><tt class="py-op">=</tt><tt class="py-name">docId</tt><tt class="py-op">+</tt><tt class="py-string">'.'</tt><tt class="py-op">+</tt><tt class="py-name">sentenceId</tt><tt class="py-op">+</tt><tt class="py-string">'.jpeg'</tt> </tt>
<a name="L322"></a><tt class="py-lineno">322</tt>  <tt class="py-line">                                                        <tt class="py-name">f</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">image_files_dir</tt><tt class="py-op">,</tt><tt class="py-name">jpegFileName</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-string">"wb+"</tt><tt class="py-op">)</tt> </tt>
<a name="L323"></a><tt class="py-lineno">323</tt>  <tt class="py-line">                                                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">salida_jpeg</tt><tt class="py-op">)</tt> </tt>
<a name="L324"></a><tt class="py-lineno">324</tt>  <tt class="py-line">                                                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L325"></a><tt class="py-lineno">325</tt>  <tt class="py-line"> </tt>
<a name="print_attribute_table"></a><div id="print_attribute_table-def"><a name="L326"></a><tt class="py-lineno">326</tt> <a class="py-toggle" href="#" id="print_attribute_table-toggle" onclick="return toggle('print_attribute_table');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#print_attribute_table">print_attribute_table</a><tt class="py-op">(</tt><tt class="py-param">bc</tt><tt class="py-op">,</tt><tt class="py-param">bcp</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="print_attribute_table-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="print_attribute_table-expanded"><a name="L327"></a><tt class="py-lineno">327</tt>  <tt class="py-line">        <tt class="py-docstring">""" </tt> </tt>
<a name="L328"></a><tt class="py-lineno">328</tt>  <tt class="py-line"><tt class="py-docstring">        Genera los archivos con la tabulaci&#243;n de los atributos de cada oraci&#243;n de los documentos del corpus</tt> </tt>
<a name="L329"></a><tt class="py-lineno">329</tt>  <tt class="py-line"><tt class="py-docstring">        @arg bc: Corpus</tt> </tt>
<a name="L330"></a><tt class="py-lineno">330</tt>  <tt class="py-line"><tt class="py-docstring">        @type bc: L{bioscope.BioscopeCorpus}</tt> </tt>
<a name="L331"></a><tt class="py-lineno">331</tt>  <tt class="py-line"><tt class="py-docstring"></tt> </tt>
<a name="L332"></a><tt class="py-lineno">332</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L333"></a><tt class="py-lineno">333</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt class="py-name">docId</tt><tt class="py-op">,</tt><tt class="py-name">d</tt><tt class="py-op">)</tt> <tt class="py-keyword">in</tt> <tt class="py-name">bc</tt><tt class="py-op">.</tt><tt class="py-name">documents</tt><tt class="py-op">.</tt><tt class="py-name">iteritems</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L334"></a><tt class="py-lineno">334</tt>  <tt class="py-line">                        <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Genero atributos para el documento "</tt><tt class="py-op">,</tt><tt class="py-name">docId</tt>     </tt>
<a name="L335"></a><tt class="py-lineno">335</tt>  <tt class="py-line">                        <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt class="py-name">sentenceId</tt><tt class="py-op">,</tt><tt class="py-name">sentence</tt><tt class="py-op">)</tt> <tt class="py-keyword">in</tt> <tt class="py-name">d</tt><tt class="py-op">.</tt><tt class="py-name">sentences</tt><tt class="py-op">.</tt><tt class="py-name">iteritems</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L336"></a><tt class="py-lineno">336</tt>  <tt class="py-line">                                <tt class="py-keyword">if</tt> <tt class="py-name">sentence</tt><tt class="py-op">.</tt><tt class="py-name">data_loaded</tt><tt class="py-op">:</tt> </tt>
<a name="L337"></a><tt class="py-lineno">337</tt>  <tt class="py-line">                                        <tt class="py-name">s_table</tt><tt class="py-op">=</tt><tt class="py-name">sentence</tt><tt class="py-op">.</tt><tt class="py-name">get_att_table</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L338"></a><tt class="py-lineno">338</tt>  <tt class="py-line">                                        <tt class="py-name">tabFileName</tt><tt class="py-op">=</tt><tt class="py-name">docId</tt><tt class="py-op">+</tt><tt class="py-string">'.'</tt><tt class="py-op">+</tt><tt class="py-name">sentenceId</tt><tt class="py-op">+</tt><tt class="py-string">'.html'</tt> </tt>
<a name="L339"></a><tt class="py-lineno">339</tt>  <tt class="py-line">                                        <tt class="py-name">f</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">os</tt><tt class="py-op">.</tt><tt class="py-name">path</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">bcp</tt><tt class="py-op">.</tt><tt class="py-name">att_dir</tt><tt class="py-op">,</tt><tt class="py-name">tabFileName</tt><tt class="py-op">)</tt><tt class="py-op">,</tt><tt class="py-string">'w+'</tt><tt class="py-op">)</tt> </tt>
<a name="L340"></a><tt class="py-lineno">340</tt>  <tt class="py-line">                                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-string">'&lt;HTML&gt;&lt;BODY&gt;&lt;TABLE border=1&gt;\n'</tt><tt class="py-op">)</tt> </tt>
<a name="L341"></a><tt class="py-lineno">341</tt>  <tt class="py-line">                                        <tt class="py-name">content</tt><tt class="py-op">=</tt><tt class="py-string">''</tt> </tt>
<a name="L342"></a><tt class="py-lineno">342</tt>  <tt class="py-line">                                        <tt class="py-name">i</tt><tt class="py-op">=</tt><tt class="py-number">0</tt> </tt>
<a name="L343"></a><tt class="py-lineno">343</tt>  <tt class="py-line">                                        <tt class="py-keyword">for</tt> <tt class="py-name">s</tt> <tt class="py-keyword">in</tt> <tt class="py-name">s_table</tt><tt class="py-op">:</tt> </tt>
<a name="L344"></a><tt class="py-lineno">344</tt>  <tt class="py-line">                                                <tt class="py-name">content</tt> <tt class="py-op">+=</tt><tt class="py-string">'&lt;TR&gt;'</tt> </tt>
<a name="L345"></a><tt class="py-lineno">345</tt>  <tt class="py-line">                                                <tt class="py-keyword">if</tt> <tt class="py-name">i</tt><tt class="py-op">==</tt><tt class="py-number">0</tt><tt class="py-op">:</tt> </tt>
<a name="L346"></a><tt class="py-lineno">346</tt>  <tt class="py-line">                                                        <tt class="py-keyword">for</tt> <tt class="py-name">e</tt> <tt class="py-keyword">in</tt> <tt class="py-name">s</tt><tt class="py-op">:</tt> </tt>
<a name="L347"></a><tt class="py-lineno">347</tt>  <tt class="py-line">                                                                <tt class="py-keyword">if</tt> <tt class="py-name">type</tt><tt class="py-op">(</tt><tt class="py-name">e</tt><tt class="py-op">)</tt><tt class="py-op">==</tt><tt class="py-name">list</tt><tt class="py-op">:</tt> </tt>
<a name="L348"></a><tt class="py-lineno">348</tt>  <tt class="py-line">                                                                        <tt class="py-name">content</tt> <tt class="py-op">+=</tt><tt class="py-string">'&lt;TH&gt;'</tt><tt class="py-op">+</tt><tt class="py-string">'['</tt><tt class="py-op">+</tt><tt class="py-string">','</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">e</tt><tt class="py-op">)</tt><tt class="py-op">+</tt><tt class="py-string">']'</tt><tt class="py-op">+</tt><tt class="py-string">'&lt;/TH&gt;'</tt>                                     </tt>
<a name="L349"></a><tt class="py-lineno">349</tt>  <tt class="py-line">                                                                <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L350"></a><tt class="py-lineno">350</tt>  <tt class="py-line">                                                                        <tt class="py-name">content</tt> <tt class="py-op">+=</tt><tt class="py-string">'&lt;TH&gt;'</tt><tt class="py-op">+</tt><tt class="py-name">e</tt><tt class="py-op">+</tt><tt class="py-string">'&lt;/TH&gt;'</tt>                                       </tt>
<a name="L351"></a><tt class="py-lineno">351</tt>  <tt class="py-line">                                                <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L352"></a><tt class="py-lineno">352</tt>  <tt class="py-line">                                                        <tt class="py-keyword">for</tt> <tt class="py-name">e</tt> <tt class="py-keyword">in</tt> <tt class="py-name">s</tt><tt class="py-op">:</tt>      </tt>
<a name="L353"></a><tt class="py-lineno">353</tt>  <tt class="py-line">                                                                <tt class="py-keyword">if</tt> <tt class="py-keyword">not</tt> <tt class="py-name">e</tt><tt class="py-op">:</tt> <tt class="py-name">e</tt><tt class="py-op">=</tt><tt class="py-string">'None'</tt> </tt>
<a name="L354"></a><tt class="py-lineno">354</tt>  <tt class="py-line">                                                                 </tt>
<a name="L355"></a><tt class="py-lineno">355</tt>  <tt class="py-line">                                                                <tt class="py-keyword">if</tt> <tt class="py-name">type</tt><tt class="py-op">(</tt><tt class="py-name">e</tt><tt class="py-op">)</tt><tt class="py-op">==</tt><tt class="py-name">list</tt><tt class="py-op">:</tt> </tt>
<a name="L356"></a><tt class="py-lineno">356</tt>  <tt class="py-line">                                                                        <tt class="py-name">content</tt> <tt class="py-op">+=</tt><tt class="py-string">'&lt;TD&gt;'</tt><tt class="py-op">+</tt><tt class="py-string">'['</tt><tt class="py-op">+</tt><tt class="py-string">','</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">e</tt><tt class="py-op">)</tt><tt class="py-op">+</tt><tt class="py-string">']'</tt><tt class="py-op">+</tt><tt class="py-string">'&lt;/TD&gt;'</tt>                                                                                                     </tt>
<a name="L357"></a><tt class="py-lineno">357</tt>  <tt class="py-line">                                                                <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L358"></a><tt class="py-lineno">358</tt>  <tt class="py-line">                                                                        <tt class="py-name">content</tt> <tt class="py-op">+=</tt><tt class="py-string">'&lt;TD&gt;'</tt><tt class="py-op">+</tt><tt class="py-name">e</tt><tt class="py-op">+</tt><tt class="py-string">'&lt;/TD&gt;'</tt> </tt>
<a name="L359"></a><tt class="py-lineno">359</tt>  <tt class="py-line">                                                <tt class="py-name">content</tt><tt class="py-op">=</tt><tt class="py-name">content</tt><tt class="py-op">+</tt><tt class="py-string">'&lt;/TR&gt;\n'</tt> </tt>
<a name="L360"></a><tt class="py-lineno">360</tt>  <tt class="py-line">                                                <tt class="py-name">i</tt><tt class="py-op">=</tt><tt class="py-name">i</tt><tt class="py-op">+</tt><tt class="py-number">1</tt> </tt>
<a name="L361"></a><tt class="py-lineno">361</tt>  <tt class="py-line">                                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">content</tt><tt class="py-op">)</tt> </tt>
<a name="L362"></a><tt class="py-lineno">362</tt>  <tt class="py-line">                                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-string">'&lt;/TABLE&gt;&lt;/BODY&gt;&lt;/HTML&gt;'</tt><tt class="py-op">)</tt> </tt>
<a name="L363"></a><tt class="py-lineno">363</tt>  <tt class="py-line">                                        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L364"></a><tt class="py-lineno">364</tt>  <tt class="py-line"> </tt>
<a name="L365"></a><tt class="py-lineno">365</tt>  <tt class="py-line"> </tt>
<a name="save_basic_attributes"></a><div id="save_basic_attributes-def"><a name="L366"></a><tt class="py-lineno">366</tt> <a class="py-toggle" href="#" id="save_basic_attributes-toggle" onclick="return toggle('save_basic_attributes');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#save_basic_attributes">save_basic_attributes</a><tt class="py-op">(</tt><tt class="py-param">bc</tt><tt class="py-op">,</tt><tt class="py-param">dbname</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="save_basic_attributes-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="save_basic_attributes-expanded"><a name="L367"></a><tt class="py-lineno">367</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L368"></a><tt class="py-lineno">368</tt>  <tt class="py-line"><tt class="py-docstring">        Dado un corpus en memoria, persiste en la base de datos los atributos b&#225;sicos. </tt> </tt>
<a name="L369"></a><tt class="py-lineno">369</tt>  <tt class="py-line"><tt class="py-docstring">        La idea es que esto se corra sobre la tabla vac&#237;a.</tt> </tt>
<a name="L370"></a><tt class="py-lineno">370</tt>  <tt class="py-line"><tt class="py-docstring">        @arg bc: Corpus</tt> </tt>
<a name="L371"></a><tt class="py-lineno">371</tt>  <tt class="py-line"><tt class="py-docstring">        @type bc: L{bioscope.BioscopeCorpus}</tt> </tt>
<a name="L372"></a><tt class="py-lineno">372</tt>  <tt class="py-line"><tt class="py-docstring">        @arg dbname: nombre del archivo que tiene la base de datos</tt> </tt>
<a name="L373"></a><tt class="py-lineno">373</tt>  <tt class="py-line"><tt class="py-docstring">        @type dbname:C{string}</tt> </tt>
<a name="L374"></a><tt class="py-lineno">374</tt>  <tt class="py-line"><tt class="py-docstring">        </tt> </tt>
<a name="L375"></a><tt class="py-lineno">375</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L376"></a><tt class="py-lineno">376</tt>  <tt class="py-line"> </tt>
<a name="L377"></a><tt class="py-lineno">377</tt>  <tt class="py-line">        <tt class="py-name">t0</tt><tt class="py-op">=</tt><tt class="py-name">time</tt><tt class="py-op">.</tt><tt class="py-name">clock</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L378"></a><tt class="py-lineno">378</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">=</tt> <tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">connect</tt><tt class="py-op">(</tt><tt class="py-name">dbname</tt><tt class="py-op">)</tt>    </tt>
<a name="L379"></a><tt class="py-lineno">379</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">text_factory</tt> <tt class="py-op">=</tt> <tt class="py-name">str</tt> </tt>
<a name="L380"></a><tt class="py-lineno">380</tt>  <tt class="py-line">         </tt>
<a name="L381"></a><tt class="py-lineno">381</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">=</tt><tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">cursor</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L382"></a><tt class="py-lineno">382</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'delete from bioscope'</tt><tt class="py-op">)</tt> </tt>
<a name="L383"></a><tt class="py-lineno">383</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt class="py-name">docId</tt><tt class="py-op">,</tt><tt class="py-name">d</tt><tt class="py-op">)</tt> <tt class="py-keyword">in</tt> <tt class="py-name">bc</tt><tt class="py-op">.</tt><tt class="py-name">documents</tt><tt class="py-op">.</tt><tt class="py-name">iteritems</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L384"></a><tt class="py-lineno">384</tt>  <tt class="py-line">                        <tt class="py-keyword">print</tt> <tt class="py-op">&gt;&gt;</tt> <tt class="py-name">stderr</tt><tt class="py-op">,</tt> <tt class="py-string">"Genero atributos para el documento "</tt><tt class="py-op">,</tt><tt class="py-name">docId</tt>     </tt>
<a name="L385"></a><tt class="py-lineno">385</tt>  <tt class="py-line">                        <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt class="py-name">sentenceId</tt><tt class="py-op">,</tt><tt class="py-name">sentence</tt><tt class="py-op">)</tt> <tt class="py-keyword">in</tt> <tt class="py-name">d</tt><tt class="py-op">.</tt><tt class="py-name">sentences</tt><tt class="py-op">.</tt><tt class="py-name">iteritems</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L386"></a><tt class="py-lineno">386</tt>  <tt class="py-line">                                <tt class="py-name">token_num</tt><tt class="py-op">=</tt><tt class="py-number">0</tt> </tt>
<a name="L387"></a><tt class="py-lineno">387</tt>  <tt class="py-line">                                <tt class="py-keyword">if</tt> <tt class="py-name">sentence</tt><tt class="py-op">.</tt><tt class="py-name">data_loaded</tt><tt class="py-op">:</tt> </tt>
<a name="L388"></a><tt class="py-lineno">388</tt>  <tt class="py-line">                                        <tt class="py-name">s_table</tt><tt class="py-op">=</tt><tt class="py-name">sentence</tt><tt class="py-op">.</tt><tt id="link-45" class="py-name" targets="Method pln_inco.bioscope.BioscopeSentence.get_basic_attributes()=pln_inco.bioscope.BioscopeSentence-class.html#get_basic_attributes"><a title="pln_inco.bioscope.BioscopeSentence.get_basic_attributes" class="py-name" href="#" onclick="return doclink('link-45', 'get_basic_attributes', 'link-45');">get_basic_attributes</a></tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L389"></a><tt class="py-lineno">389</tt>  <tt class="py-line">                                        <tt class="py-keyword">for</tt> <tt class="py-name">s</tt> <tt class="py-keyword">in</tt> <tt class="py-name">s_table</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">:</tt><tt class="py-op">]</tt><tt class="py-op">:</tt> </tt>
<a name="L390"></a><tt class="py-lineno">390</tt>  <tt class="py-line">                                                <tt class="py-name">token_num</tt><tt class="py-op">=</tt><tt class="py-name">token_num</tt><tt class="py-op">+</tt><tt class="py-number">1</tt> </tt>
<a name="L391"></a><tt class="py-lineno">391</tt>  <tt class="py-line">                                                <tt class="py-comment">#s[0]=word</tt> </tt>
<a name="L392"></a><tt class="py-lineno">392</tt>  <tt class="py-line">                                                <tt class="py-comment">#s[1]=lemma</tt> </tt>
<a name="L393"></a><tt class="py-lineno">393</tt>  <tt class="py-line">                                                <tt class="py-comment">#s[2]=POS</tt> </tt>
<a name="L394"></a><tt class="py-lineno">394</tt>  <tt class="py-line">                                                <tt class="py-comment">#s[3]=Chunk</tt> </tt>
<a name="L395"></a><tt class="py-lineno">395</tt>  <tt class="py-line">                                                <tt class="py-comment">#s[4]=NER</tt> </tt>
<a name="L396"></a><tt class="py-lineno">396</tt>  <tt class="py-line">                                                <tt class="py-comment">#s[5]= lista de HEDGE_CUES</tt> </tt>
<a name="L397"></a><tt class="py-lineno">397</tt>  <tt class="py-line">                                                <tt class="py-comment">#s[6]= lista de NEG_CUES</tt> </tt>
<a name="L398"></a><tt class="py-lineno">398</tt>  <tt class="py-line">                                                <tt class="py-comment">#s[7]= lista de SPEC_XCOPES</tt> </tt>
<a name="L399"></a><tt class="py-lineno">399</tt>  <tt class="py-line">                                                 </tt>
<a name="L400"></a><tt class="py-lineno">400</tt>  <tt class="py-line">                                                 </tt>
<a name="L401"></a><tt class="py-lineno">401</tt>  <tt class="py-line">                                                <tt class="py-comment"># Proceso las hedge cues, genero un m&#225;ximo de tres columnas para reflejar el anidamiento</tt> </tt>
<a name="L402"></a><tt class="py-lineno">402</tt>  <tt class="py-line">                                                <tt class="py-name">hedge_cue</tt><tt class="py-op">=</tt><tt class="py-op">[</tt><tt class="py-string">'O'</tt><tt class="py-op">]</tt><tt class="py-op">*</tt><tt class="py-number">3</tt> </tt>
<a name="L403"></a><tt class="py-lineno">403</tt>  <tt class="py-line">                                                <tt class="py-name">i</tt><tt class="py-op">=</tt><tt class="py-number">0</tt> </tt>
<a name="L404"></a><tt class="py-lineno">404</tt>  <tt class="py-line">                                                <tt class="py-keyword">for</tt> <tt class="py-name">elem</tt> <tt class="py-keyword">in</tt> <tt class="py-name">s</tt><tt class="py-op">[</tt><tt class="py-number">5</tt><tt class="py-op">]</tt><tt class="py-op">:</tt> </tt>
<a name="L405"></a><tt class="py-lineno">405</tt>  <tt class="py-line">                                                        <tt class="py-name">hedge_cue</tt><tt class="py-op">[</tt><tt class="py-name">i</tt><tt class="py-op">]</tt><tt class="py-op">=</tt><tt class="py-name">elem</tt> </tt>
<a name="L406"></a><tt class="py-lineno">406</tt>  <tt class="py-line">                                                        <tt class="py-name">i</tt><tt class="py-op">=</tt><tt class="py-name">i</tt><tt class="py-op">+</tt><tt class="py-number">1</tt> </tt>
<a name="L407"></a><tt class="py-lineno">407</tt>  <tt class="py-line">                                                        <tt class="py-keyword">if</tt> <tt class="py-name">i</tt><tt class="py-op">==</tt><tt class="py-number">3</tt><tt class="py-op">:</tt> </tt>
<a name="L408"></a><tt class="py-lineno">408</tt>  <tt class="py-line">                                                                <tt class="py-keyword">break</tt> </tt>
<a name="L409"></a><tt class="py-lineno">409</tt>  <tt class="py-line">                                                                 </tt>
<a name="L410"></a><tt class="py-lineno">410</tt>  <tt class="py-line">                                                <tt class="py-keyword">if</tt> <tt class="py-name">hedge_cue</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> <tt class="py-op">!=</tt> <tt class="py-string">'O'</tt><tt class="py-op">:</tt> </tt>
<a name="L411"></a><tt class="py-lineno">411</tt>  <tt class="py-line">                                                        <tt class="py-name">hc</tt><tt class="py-op">=</tt><tt class="py-name">hedge_cue</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> </tt>
<a name="L412"></a><tt class="py-lineno">412</tt>  <tt class="py-line">                                                <tt class="py-keyword">elif</tt> <tt class="py-name">hedge_cue</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> <tt class="py-op">!=</tt> <tt class="py-string">'O'</tt><tt class="py-op">:</tt> </tt>
<a name="L413"></a><tt class="py-lineno">413</tt>  <tt class="py-line">                                                        <tt class="py-name">hc</tt><tt class="py-op">=</tt><tt class="py-name">hedge_cue</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> </tt>
<a name="L414"></a><tt class="py-lineno">414</tt>  <tt class="py-line">                                                <tt class="py-keyword">elif</tt> <tt class="py-name">hedge_cue</tt><tt class="py-op">[</tt><tt class="py-number">2</tt><tt class="py-op">]</tt> <tt class="py-op">!=</tt> <tt class="py-string">'O'</tt><tt class="py-op">:</tt> </tt>
<a name="L415"></a><tt class="py-lineno">415</tt>  <tt class="py-line">                                                        <tt class="py-name">hc</tt><tt class="py-op">=</tt><tt class="py-name">hedge_cue</tt><tt class="py-op">[</tt><tt class="py-number">2</tt><tt class="py-op">]</tt> </tt>
<a name="L416"></a><tt class="py-lineno">416</tt>  <tt class="py-line">                                                <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L417"></a><tt class="py-lineno">417</tt>  <tt class="py-line">                                                        <tt class="py-name">hc</tt><tt class="py-op">=</tt><tt class="py-string">'O'</tt>   </tt>
<a name="L418"></a><tt class="py-lineno">418</tt>  <tt class="py-line">                                                   </tt>
<a name="L419"></a><tt class="py-lineno">419</tt>  <tt class="py-line">                                                 </tt>
<a name="L420"></a><tt class="py-lineno">420</tt>  <tt class="py-line">                                                <tt class="py-comment"># Proceso las marcas de scope</tt> </tt>
<a name="L421"></a><tt class="py-lineno">421</tt>  <tt class="py-line">                                                <tt class="py-comment"># Genero un m&#225;ximo de 3 columnas. Si no tienen nada, les pongo O</tt> </tt>
<a name="L422"></a><tt class="py-lineno">422</tt>  <tt class="py-line">                                                <tt class="py-name">hedge_xcope</tt><tt class="py-op">=</tt><tt class="py-op">[</tt><tt class="py-string">'O'</tt><tt class="py-op">]</tt><tt class="py-op">*</tt><tt class="py-number">3</tt> </tt>
<a name="L423"></a><tt class="py-lineno">423</tt>  <tt class="py-line">                                                <tt class="py-name">i</tt><tt class="py-op">=</tt><tt class="py-number">0</tt> </tt>
<a name="L424"></a><tt class="py-lineno">424</tt>  <tt class="py-line">                                                <tt class="py-keyword">for</tt> <tt class="py-name">elem</tt> <tt class="py-keyword">in</tt> <tt class="py-name">s</tt><tt class="py-op">[</tt><tt class="py-number">7</tt><tt class="py-op">]</tt><tt class="py-op">:</tt> </tt>
<a name="L425"></a><tt class="py-lineno">425</tt>  <tt class="py-line">                                                        <tt class="py-name">hedge_xcope</tt><tt class="py-op">[</tt><tt class="py-name">i</tt><tt class="py-op">]</tt><tt class="py-op">=</tt><tt class="py-name">elem</tt> </tt>
<a name="L426"></a><tt class="py-lineno">426</tt>  <tt class="py-line">                                                        <tt class="py-name">i</tt><tt class="py-op">=</tt><tt class="py-name">i</tt><tt class="py-op">+</tt><tt class="py-number">1</tt> </tt>
<a name="L427"></a><tt class="py-lineno">427</tt>  <tt class="py-line">                                                        <tt class="py-keyword">if</tt> <tt class="py-name">i</tt><tt class="py-op">==</tt><tt class="py-number">3</tt><tt class="py-op">:</tt> </tt>
<a name="L428"></a><tt class="py-lineno">428</tt>  <tt class="py-line">                                                                <tt class="py-keyword">break</tt> </tt>
<a name="L429"></a><tt class="py-lineno">429</tt>  <tt class="py-line">                                                 </tt>
<a name="L430"></a><tt class="py-lineno">430</tt>  <tt class="py-line">                                                <tt class="py-comment">#print docId, sentenceId,token_num, s[0]</tt> </tt>
<a name="L431"></a><tt class="py-lineno">431</tt>  <tt class="py-line">                                                <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">"""insert into bioscope (document_id,sentence_id,token_num,word,lemma,POS,CHUNK,NER,hedge_cue,hedge_cue1, hedge_cue2, hedge_cue3,hedge_xcope1,hedge_xcope2,hedge_xcope3) </tt> </tt>
<a name="L432"></a><tt class="py-lineno">432</tt>  <tt class="py-line"><tt class="py-string">                                                values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"""</tt><tt class="py-op">,</tt> </tt>
<a name="L433"></a><tt class="py-lineno">433</tt>  <tt class="py-line">                                                <tt class="py-op">(</tt><tt class="py-name">docId</tt><tt class="py-op">,</tt><tt class="py-name">sentenceId</tt><tt class="py-op">,</tt><tt class="py-name">token_num</tt><tt class="py-op">,</tt><tt class="py-name">s</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">s</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">s</tt><tt class="py-op">[</tt><tt class="py-number">2</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">s</tt><tt class="py-op">[</tt><tt class="py-number">3</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">s</tt><tt class="py-op">[</tt><tt class="py-number">4</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">hc</tt><tt class="py-op">,</tt><tt class="py-name">hedge_cue</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">hedge_cue</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">hedge_cue</tt><tt class="py-op">[</tt><tt class="py-number">2</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">hedge_xcope</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">hedge_xcope</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">hedge_xcope</tt><tt class="py-op">[</tt><tt class="py-number">2</tt><tt class="py-op">]</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L434"></a><tt class="py-lineno">434</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L435"></a><tt class="py-lineno">435</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">commit</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L436"></a><tt class="py-lineno">436</tt>  <tt class="py-line">        <tt class="py-keyword">print</tt> <tt class="py-string">'Tiempo del proceso:'</tt><tt class="py-op">,</tt> <tt class="py-name">time</tt><tt class="py-op">.</tt><tt class="py-name">clock</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">-</tt><tt class="py-name">t0</tt> </tt>
</div><a name="L437"></a><tt class="py-lineno">437</tt>  <tt class="py-line"> </tt>
<a name="L438"></a><tt class="py-lineno">438</tt>  <tt class="py-line"> </tt>
<a name="split_training_corpus"></a><div id="split_training_corpus-def"><a name="L439"></a><tt class="py-lineno">439</tt> <a class="py-toggle" href="#" id="split_training_corpus-toggle" onclick="return toggle('split_training_corpus');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#split_training_corpus">split_training_corpus</a><tt class="py-op">(</tt><tt class="py-param">dbname</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="split_training_corpus-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="split_training_corpus-expanded"><a name="L440"></a><tt class="py-lineno">440</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L441"></a><tt class="py-lineno">441</tt>  <tt class="py-line"><tt class="py-docstring">        Separa la tabla bioscope en 80/20, manteniendo los atributos que tenga actualmente</tt> </tt>
<a name="L442"></a><tt class="py-lineno">442</tt>  <tt class="py-line"><tt class="py-docstring">        Reescribe las tablas bioscope80 y bioscope20.</tt> </tt>
<a name="L443"></a><tt class="py-lineno">443</tt>  <tt class="py-line"><tt class="py-docstring">        @arg dbname: nombre del archivo que tiene la base de datos</tt> </tt>
<a name="L444"></a><tt class="py-lineno">444</tt>  <tt class="py-line"><tt class="py-docstring">        @type dbname:C{string}</tt> </tt>
<a name="L445"></a><tt class="py-lineno">445</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L446"></a><tt class="py-lineno">446</tt>  <tt class="py-line">         </tt>
<a name="L447"></a><tt class="py-lineno">447</tt>  <tt class="py-line">        <tt class="py-comment">#Abro la tabla bioscope en dbfile</tt> </tt>
<a name="L448"></a><tt class="py-lineno">448</tt>  <tt class="py-line">        <tt class="py-name">t0</tt><tt class="py-op">=</tt><tt class="py-name">time</tt><tt class="py-op">.</tt><tt class="py-name">clock</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L449"></a><tt class="py-lineno">449</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">=</tt> <tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">connect</tt><tt class="py-op">(</tt><tt class="py-name">dbname</tt><tt class="py-op">)</tt>    </tt>
<a name="L450"></a><tt class="py-lineno">450</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">text_factory</tt> <tt class="py-op">=</tt> <tt class="py-name">str</tt> </tt>
<a name="L451"></a><tt class="py-lineno">451</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">row_factory</tt><tt class="py-op">=</tt><tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">Row</tt> </tt>
<a name="L452"></a><tt class="py-lineno">452</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">=</tt><tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">cursor</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L453"></a><tt class="py-lineno">453</tt>  <tt class="py-line">        <tt class="py-name">c2</tt><tt class="py-op">=</tt><tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">cursor</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L454"></a><tt class="py-lineno">454</tt>  <tt class="py-line"> </tt>
<a name="L455"></a><tt class="py-lineno">455</tt>  <tt class="py-line">        <tt class="py-comment"># Dropeo las tablas bioscope80 y bioscope20</tt> </tt>
<a name="L456"></a><tt class="py-lineno">456</tt>  <tt class="py-line">        <tt class="py-comment"># y las creo de nuevo, vac&#237;as</tt> </tt>
<a name="L457"></a><tt class="py-lineno">457</tt>  <tt class="py-line">        <tt class="py-keyword">try</tt><tt class="py-op">:</tt> </tt>
<a name="L458"></a><tt class="py-lineno">458</tt>  <tt class="py-line">                <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'drop table bioscope80'</tt><tt class="py-op">)</tt> </tt>
<a name="L459"></a><tt class="py-lineno">459</tt>  <tt class="py-line">        <tt class="py-keyword">except</tt> <tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">OperationalError</tt><tt class="py-op">:</tt> </tt>
<a name="L460"></a><tt class="py-lineno">460</tt>  <tt class="py-line">                <tt class="py-keyword">pass</tt> </tt>
<a name="L461"></a><tt class="py-lineno">461</tt>  <tt class="py-line">                 </tt>
<a name="L462"></a><tt class="py-lineno">462</tt>  <tt class="py-line">        <tt class="py-keyword">try</tt><tt class="py-op">:</tt> </tt>
<a name="L463"></a><tt class="py-lineno">463</tt>  <tt class="py-line">                <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'drop table bioscope20'</tt><tt class="py-op">)</tt> </tt>
<a name="L464"></a><tt class="py-lineno">464</tt>  <tt class="py-line">        <tt class="py-keyword">except</tt> <tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">OperationalError</tt><tt class="py-op">:</tt> </tt>
<a name="L465"></a><tt class="py-lineno">465</tt>  <tt class="py-line">                <tt class="py-keyword">pass</tt> </tt>
<a name="L466"></a><tt class="py-lineno">466</tt>  <tt class="py-line"> </tt>
<a name="L467"></a><tt class="py-lineno">467</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'create table bioscope80 as select * from bioscope where 0=1'</tt><tt class="py-op">)</tt> </tt>
<a name="L468"></a><tt class="py-lineno">468</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'create table bioscope20 as select * from bioscope where 0=1'</tt><tt class="py-op">)</tt> </tt>
<a name="L469"></a><tt class="py-lineno">469</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">commit</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L470"></a><tt class="py-lineno">470</tt>  <tt class="py-line">         </tt>
<a name="L471"></a><tt class="py-lineno">471</tt>  <tt class="py-line">         </tt>
<a name="L472"></a><tt class="py-lineno">472</tt>  <tt class="py-line">        <tt class="py-comment"># Recorro la tabla bioscope y separo el 80% de las oraciones entre entrenamiento y testeo</tt> </tt>
<a name="L473"></a><tt class="py-lineno">473</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'select document_id,sentence_id from bioscope group by sentence_id'</tt><tt class="py-op">)</tt> </tt>
<a name="L474"></a><tt class="py-lineno">474</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">row</tt> <tt class="py-keyword">in</tt> <tt class="py-name">c</tt><tt class="py-op">:</tt> </tt>
<a name="L475"></a><tt class="py-lineno">475</tt>  <tt class="py-line">                <tt class="py-comment">#Sorteo</tt> </tt>
<a name="L476"></a><tt class="py-lineno">476</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">random</tt><tt class="py-op">.</tt><tt class="py-name">random</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">&lt;</tt><tt class="py-number">0.8</tt><tt class="py-op">:</tt> </tt>
<a name="L477"></a><tt class="py-lineno">477</tt>  <tt class="py-line">                        <tt class="py-name">target_table</tt><tt class="py-op">=</tt><tt class="py-string">'bioscope80'</tt><tt class="py-op">;</tt> </tt>
<a name="L478"></a><tt class="py-lineno">478</tt>  <tt class="py-line">                <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L479"></a><tt class="py-lineno">479</tt>  <tt class="py-line">                        <tt class="py-name">target_table</tt><tt class="py-op">=</tt><tt class="py-string">'bioscope20'</tt><tt class="py-op">;</tt> </tt>
<a name="L480"></a><tt class="py-lineno">480</tt>  <tt class="py-line">                <tt class="py-name">c2</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'insert into '</tt><tt class="py-op">+</tt><tt class="py-name">target_table</tt><tt class="py-op">+</tt><tt class="py-string">' select * from bioscope where document_id=? and sentence_id=?'</tt><tt class="py-op">,</tt><tt class="py-op">(</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'document_id'</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'sentence_id'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L481"></a><tt class="py-lineno">481</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">commit</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L482"></a><tt class="py-lineno">482</tt>  <tt class="py-line">        <tt class="py-name">c2</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L483"></a><tt class="py-lineno">483</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L484"></a><tt class="py-lineno">484</tt>  <tt class="py-line">                         </tt>
<a name="generate_scope_analysis_table"></a><div id="generate_scope_analysis_table-def"><a name="L485"></a><tt class="py-lineno">485</tt> <a class="py-toggle" href="#" id="generate_scope_analysis_table-toggle" onclick="return toggle('generate_scope_analysis_table');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#generate_scope_analysis_table">generate_scope_analysis_table</a><tt class="py-op">(</tt><tt class="py-param">dbname</tt><tt class="py-op">,</tt><tt class="py-param">source_table</tt><tt class="py-op">,</tt><tt class="py-param">target_table</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="generate_scope_analysis_table-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="generate_scope_analysis_table-expanded"><a name="L486"></a><tt class="py-lineno">486</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L487"></a><tt class="py-lineno">487</tt>  <tt class="py-line"><tt class="py-docstring">        Toma una tabla, recorre las oraciones y por cada instancia de HEDGE CUE que encuentra,</tt> </tt>
<a name="L488"></a><tt class="py-lineno">488</tt>  <tt class="py-line"><tt class="py-docstring">        genera una instancia de entrenamiento con la oraci&#243;n y la identificaci&#243;n del HC (adem&#225;s de los atributos que ya ten&#237;a).</tt> </tt>
<a name="L489"></a><tt class="py-lineno">489</tt>  <tt class="py-line"><tt class="py-docstring">        Convierte el scope al formato FOL</tt> </tt>
<a name="L490"></a><tt class="py-lineno">490</tt>  <tt class="py-line"><tt class="py-docstring">        @arg dbname: nombre del archivo que tiene la base de datos</tt> </tt>
<a name="L491"></a><tt class="py-lineno">491</tt>  <tt class="py-line"><tt class="py-docstring">        @type dbname:C{string}</tt> </tt>
<a name="L492"></a><tt class="py-lineno">492</tt>  <tt class="py-line"><tt class="py-docstring">        @arg source_table: nombre de la tabla origen (bioscope80)</tt> </tt>
<a name="L493"></a><tt class="py-lineno">493</tt>  <tt class="py-line"><tt class="py-docstring">        @type source_table:C{string}</tt> </tt>
<a name="L494"></a><tt class="py-lineno">494</tt>  <tt class="py-line"><tt class="py-docstring">        @arg target_table: nombre de la tabla destino (bioscope80_scope)</tt> </tt>
<a name="L495"></a><tt class="py-lineno">495</tt>  <tt class="py-line"><tt class="py-docstring">        @type target_table:C{string}</tt> </tt>
<a name="L496"></a><tt class="py-lineno">496</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L497"></a><tt class="py-lineno">497</tt>  <tt class="py-line"> </tt>
<a name="L498"></a><tt class="py-lineno">498</tt>  <tt class="py-line">        <tt class="py-comment">#Conexi&#243;n a la base de datos</tt> </tt>
<a name="L499"></a><tt class="py-lineno">499</tt>  <tt class="py-line">        <tt class="py-name">t0</tt><tt class="py-op">=</tt><tt class="py-name">time</tt><tt class="py-op">.</tt><tt class="py-name">clock</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L500"></a><tt class="py-lineno">500</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">=</tt> <tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">connect</tt><tt class="py-op">(</tt><tt class="py-name">dbname</tt><tt class="py-op">)</tt>    </tt>
<a name="L501"></a><tt class="py-lineno">501</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">text_factory</tt> <tt class="py-op">=</tt> <tt class="py-name">str</tt> </tt>
<a name="L502"></a><tt class="py-lineno">502</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">row_factory</tt><tt class="py-op">=</tt><tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">Row</tt> </tt>
<a name="L503"></a><tt class="py-lineno">503</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">=</tt><tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">cursor</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L504"></a><tt class="py-lineno">504</tt>  <tt class="py-line">        <tt class="py-name">c2</tt><tt class="py-op">=</tt><tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">cursor</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L505"></a><tt class="py-lineno">505</tt>  <tt class="py-line">        <tt class="py-name">c3</tt><tt class="py-op">=</tt><tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">cursor</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L506"></a><tt class="py-lineno">506</tt>  <tt class="py-line">        <tt class="py-name">c4</tt><tt class="py-op">=</tt><tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">cursor</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L507"></a><tt class="py-lineno">507</tt>  <tt class="py-line"> </tt>
<a name="L508"></a><tt class="py-lineno">508</tt>  <tt class="py-line">        <tt class="py-comment"># Dropeo la tabla destino y la creo de nuevo, igual a la origen, vac&#237;a</tt> </tt>
<a name="L509"></a><tt class="py-lineno">509</tt>  <tt class="py-line">        <tt class="py-keyword">try</tt><tt class="py-op">:</tt> </tt>
<a name="L510"></a><tt class="py-lineno">510</tt>  <tt class="py-line">                <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'drop table '</tt><tt class="py-op">+</tt><tt class="py-name">target_table</tt><tt class="py-op">)</tt> </tt>
<a name="L511"></a><tt class="py-lineno">511</tt>  <tt class="py-line">        <tt class="py-keyword">except</tt> <tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">OperationalError</tt><tt class="py-op">:</tt> </tt>
<a name="L512"></a><tt class="py-lineno">512</tt>  <tt class="py-line">                <tt class="py-keyword">pass</tt> </tt>
<a name="L513"></a><tt class="py-lineno">513</tt>  <tt class="py-line"> </tt>
<a name="L514"></a><tt class="py-lineno">514</tt>  <tt class="py-line">        <tt class="py-comment"># Creo una tabla igual a la origen, vac&#237;a</tt> </tt>
<a name="L515"></a><tt class="py-lineno">515</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'create table '</tt><tt class="py-op">+</tt><tt class="py-name">target_table</tt><tt class="py-op">+</tt><tt class="py-string">' as select * from '</tt><tt class="py-op">+</tt><tt class="py-name">source_table</tt><tt class="py-op">+</tt><tt class="py-string">' where 0=1'</tt><tt class="py-op">)</tt> </tt>
<a name="L516"></a><tt class="py-lineno">516</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">commit</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L517"></a><tt class="py-lineno">517</tt>  <tt class="py-line"> </tt>
<a name="L518"></a><tt class="py-lineno">518</tt>  <tt class="py-line">        <tt class="py-comment"># Agrego el diferenciador de instancia, la columna para la hc, y la columna para el scope</tt> </tt>
<a name="L519"></a><tt class="py-lineno">519</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'alter table '</tt><tt class="py-op">+</tt><tt class="py-name">target_table</tt><tt class="py-op">+</tt><tt class="py-string">' add column instance_number integer'</tt><tt class="py-op">)</tt><tt class="py-op">;</tt> </tt>
<a name="L520"></a><tt class="py-lineno">520</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'alter table '</tt><tt class="py-op">+</tt><tt class="py-name">target_table</tt><tt class="py-op">+</tt><tt class="py-string">' add column hc_token string'</tt><tt class="py-op">)</tt><tt class="py-op">;</tt> </tt>
<a name="L521"></a><tt class="py-lineno">521</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'alter table '</tt><tt class="py-op">+</tt><tt class="py-name">target_table</tt><tt class="py-op">+</tt><tt class="py-string">' add column xcope string'</tt><tt class="py-op">)</tt><tt class="py-op">;</tt> </tt>
<a name="L522"></a><tt class="py-lineno">522</tt>  <tt class="py-line"> </tt>
<a name="L523"></a><tt class="py-lineno">523</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">commit</tt><tt class="py-op">;</tt> </tt>
<a name="L524"></a><tt class="py-lineno">524</tt>  <tt class="py-line"> </tt>
<a name="L525"></a><tt class="py-lineno">525</tt>  <tt class="py-line">        <tt class="py-comment"># Recorro oraci&#243;n por oraci&#243;n y voy insertando</tt> </tt>
<a name="L526"></a><tt class="py-lineno">526</tt>  <tt class="py-line">        <tt class="py-comment"># Antes armo la lista de columnas y signos de interrogaci&#243;n</tt> </tt>
<a name="L527"></a><tt class="py-lineno">527</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'select * from '</tt><tt class="py-op">+</tt><tt class="py-name">source_table</tt><tt class="py-op">+</tt> <tt class="py-string">' limit 1'</tt><tt class="py-op">)</tt> </tt>
<a name="L528"></a><tt class="py-lineno">528</tt>  <tt class="py-line">        <tt class="py-name">columnas</tt><tt class="py-op">=</tt><tt class="py-string">''</tt> </tt>
<a name="L529"></a><tt class="py-lineno">529</tt>  <tt class="py-line">        <tt class="py-name">interrogantes</tt><tt class="py-op">=</tt><tt class="py-string">''</tt> </tt>
<a name="L530"></a><tt class="py-lineno">530</tt>  <tt class="py-line">        <tt class="py-name">row</tt><tt class="py-op">=</tt><tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">fetchone</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L531"></a><tt class="py-lineno">531</tt>  <tt class="py-line">        <tt class="py-name">cant_columnas</tt><tt class="py-op">=</tt><tt class="py-name">len</tt><tt class="py-op">(</tt><tt class="py-name">row</tt><tt class="py-op">.</tt><tt class="py-name">keys</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">)</tt><tt class="py-op">+</tt><tt class="py-number">3</tt> </tt>
<a name="L532"></a><tt class="py-lineno">532</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">k</tt> <tt class="py-keyword">in</tt> <tt class="py-name">row</tt><tt class="py-op">.</tt><tt class="py-name">keys</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L533"></a><tt class="py-lineno">533</tt>  <tt class="py-line">                        <tt class="py-name">columnas</tt><tt class="py-op">+=</tt><tt class="py-string">','</tt><tt class="py-op">+</tt><tt class="py-name">k</tt> </tt>
<a name="L534"></a><tt class="py-lineno">534</tt>  <tt class="py-line">                        <tt class="py-name">interrogantes</tt><tt class="py-op">+=</tt><tt class="py-string">',?'</tt> </tt>
<a name="L535"></a><tt class="py-lineno">535</tt>  <tt class="py-line">        <tt class="py-name">columnas</tt><tt class="py-op">+=</tt><tt class="py-string">',instance_number,hc_token,xcope'</tt> </tt>
<a name="L536"></a><tt class="py-lineno">536</tt>  <tt class="py-line">        <tt class="py-name">columnas</tt><tt class="py-op">=</tt><tt class="py-name">columnas</tt><tt class="py-op">.</tt><tt class="py-name">lstrip</tt><tt class="py-op">(</tt><tt class="py-string">','</tt><tt class="py-op">)</tt> </tt>
<a name="L537"></a><tt class="py-lineno">537</tt>  <tt class="py-line">        <tt class="py-name">interrogantes</tt><tt class="py-op">=</tt><tt class="py-name">interrogantes</tt><tt class="py-op">.</tt><tt class="py-name">lstrip</tt><tt class="py-op">(</tt><tt class="py-string">','</tt><tt class="py-op">)</tt> </tt>
<a name="L538"></a><tt class="py-lineno">538</tt>  <tt class="py-line">        <tt class="py-name">interrogantes</tt><tt class="py-op">+=</tt><tt class="py-string">',?,?,?'</tt> </tt>
<a name="L539"></a><tt class="py-lineno">539</tt>  <tt class="py-line">         </tt>
<a name="L540"></a><tt class="py-lineno">540</tt>  <tt class="py-line">         </tt>
<a name="L541"></a><tt class="py-lineno">541</tt>  <tt class="py-line">        <tt class="py-comment"># Ahora s&#237; recorro e inserto</tt> </tt>
<a name="L542"></a><tt class="py-lineno">542</tt>  <tt class="py-line">        <tt class="py-comment">#c.execute('select document_id,sentence_id from '+ source_table+' where sentence_id=\'S18.9\' group by document_id,sentence_id')</tt> </tt>
<a name="L543"></a><tt class="py-lineno">543</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'select document_id,sentence_id from '</tt><tt class="py-op">+</tt> <tt class="py-name">source_table</tt><tt class="py-op">+</tt><tt class="py-string">' group by document_id,sentence_id'</tt><tt class="py-op">)</tt> </tt>
<a name="L544"></a><tt class="py-lineno">544</tt>  <tt class="py-line"> </tt>
<a name="L545"></a><tt class="py-lineno">545</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">sentence</tt> <tt class="py-keyword">in</tt> <tt class="py-name">c</tt><tt class="py-op">:</tt> </tt>
<a name="L546"></a><tt class="py-lineno">546</tt>  <tt class="py-line">                <tt class="py-comment">#print 'Oracion:',sentence['sentence_id']</tt> </tt>
<a name="L547"></a><tt class="py-lineno">547</tt>  <tt class="py-line">                 </tt>
<a name="L548"></a><tt class="py-lineno">548</tt>  <tt class="py-line">                <tt class="py-comment"># Recorro la oraci&#243;n y obtengo los datos de cada hedge_cue diferente que encuentro</tt> </tt>
<a name="L549"></a><tt class="py-lineno">549</tt>  <tt class="py-line">                <tt class="py-name">cant_hc</tt><tt class="py-op">=</tt><tt class="py-number">0</tt> </tt>
<a name="L550"></a><tt class="py-lineno">550</tt>  <tt class="py-line">                <tt class="py-name">cues</tt><tt class="py-op">=</tt><tt class="py-op">[</tt><tt class="py-op">]</tt>          </tt>
<a name="L551"></a><tt class="py-lineno">551</tt>  <tt class="py-line">                <tt class="py-name">hedge_token</tt><tt class="py-op">=</tt><tt class="py-name">None</tt> </tt>
<a name="L552"></a><tt class="py-lineno">552</tt>  <tt class="py-line">                <tt class="py-name">hedge_nested_level</tt><tt class="py-op">=</tt><tt class="py-name">None</tt> </tt>
<a name="L553"></a><tt class="py-lineno">553</tt>  <tt class="py-line">                <tt class="py-name">hedge_pos</tt><tt class="py-op">=</tt><tt class="py-name">None</tt> </tt>
<a name="L554"></a><tt class="py-lineno">554</tt>  <tt class="py-line">                <tt class="py-name">c2</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'select * from '</tt><tt class="py-op">+</tt> <tt class="py-name">source_table</tt><tt class="py-op">+</tt><tt class="py-string">' where document_id=? and sentence_id=?'</tt><tt class="py-op">,</tt> <tt class="py-op">(</tt><tt class="py-name">sentence</tt><tt class="py-op">[</tt><tt class="py-string">'document_id'</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">sentence</tt><tt class="py-op">[</tt><tt class="py-string">'sentence_id'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L555"></a><tt class="py-lineno">555</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-name">row</tt> <tt class="py-keyword">in</tt> <tt class="py-name">c2</tt><tt class="py-op">:</tt> </tt>
<a name="L556"></a><tt class="py-lineno">556</tt>  <tt class="py-line">                 </tt>
<a name="L557"></a><tt class="py-lineno">557</tt>  <tt class="py-line">                        <tt class="py-keyword">if</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'hedge_cue'</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-string">'B-SPECCUE'</tt><tt class="py-op">:</tt> </tt>
<a name="L558"></a><tt class="py-lineno">558</tt>  <tt class="py-line">                                <tt class="py-comment"># Encontr&#233; una marca de especulaci&#243;n</tt> </tt>
<a name="L559"></a><tt class="py-lineno">559</tt>  <tt class="py-line">                                <tt class="py-name">cant_hc</tt><tt class="py-op">=</tt><tt class="py-name">cant_hc</tt><tt class="py-op">+</tt><tt class="py-number">1</tt> </tt>
<a name="L560"></a><tt class="py-lineno">560</tt>  <tt class="py-line">                                <tt class="py-name">hedge_token</tt><tt class="py-op">=</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'word'</tt><tt class="py-op">]</tt> </tt>
<a name="L561"></a><tt class="py-lineno">561</tt>  <tt class="py-line">                                <tt class="py-name">hedge_pos</tt><tt class="py-op">=</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'token_num'</tt><tt class="py-op">]</tt> </tt>
<a name="L562"></a><tt class="py-lineno">562</tt>  <tt class="py-line">                                <tt class="py-keyword">if</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'hedge_cue1'</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'hedge_cue'</tt><tt class="py-op">]</tt><tt class="py-op">:</tt> </tt>
<a name="L563"></a><tt class="py-lineno">563</tt>  <tt class="py-line">                                        <tt class="py-name">hedge_nested_level</tt><tt class="py-op">=</tt><tt class="py-number">1</tt> </tt>
<a name="L564"></a><tt class="py-lineno">564</tt>  <tt class="py-line">                                <tt class="py-keyword">elif</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'hedge_cue2'</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'hedge_cue'</tt><tt class="py-op">]</tt><tt class="py-op">:</tt> </tt>
<a name="L565"></a><tt class="py-lineno">565</tt>  <tt class="py-line">                                        <tt class="py-name">hedge_nested_level</tt><tt class="py-op">=</tt><tt class="py-number">2</tt> </tt>
<a name="L566"></a><tt class="py-lineno">566</tt>  <tt class="py-line">                                <tt class="py-keyword">elif</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'hedge_cue3'</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'hedge_cue'</tt><tt class="py-op">]</tt><tt class="py-op">:</tt> </tt>
<a name="L567"></a><tt class="py-lineno">567</tt>  <tt class="py-line">                                                <tt class="py-name">hedge_nested_level</tt><tt class="py-op">=</tt><tt class="py-number">3</tt> </tt>
<a name="L568"></a><tt class="py-lineno">568</tt>  <tt class="py-line">                        <tt class="py-keyword">elif</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'hedge_cue'</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-string">'I-SPECCUE'</tt><tt class="py-op">:</tt> </tt>
<a name="L569"></a><tt class="py-lineno">569</tt>  <tt class="py-line">                                <tt class="py-comment"># Sigue la &#250;ltima marca de especulaci&#243;n</tt> </tt>
<a name="L570"></a><tt class="py-lineno">570</tt>  <tt class="py-line">                                <tt class="py-name">hedge_token</tt><tt class="py-op">=</tt><tt class="py-name">hedge_token</tt><tt class="py-op">+</tt><tt class="py-string">'_'</tt><tt class="py-op">+</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'word'</tt><tt class="py-op">]</tt> </tt>
<a name="L571"></a><tt class="py-lineno">571</tt>  <tt class="py-line">                        <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L572"></a><tt class="py-lineno">572</tt>  <tt class="py-line">                                        <tt class="py-comment"># Encontr&#233; una O, tengo que ver si estaba generando un token</tt> </tt>
<a name="L573"></a><tt class="py-lineno">573</tt>  <tt class="py-line">                                        <tt class="py-keyword">if</tt> <tt class="py-name">hedge_token</tt><tt class="py-op">:</tt> </tt>
<a name="L574"></a><tt class="py-lineno">574</tt>  <tt class="py-line">                                                <tt class="py-name">cues</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-op">(</tt><tt class="py-name">cant_hc</tt><tt class="py-op">,</tt><tt class="py-name">hedge_token</tt><tt class="py-op">,</tt><tt class="py-name">hedge_pos</tt><tt class="py-op">,</tt><tt class="py-name">hedge_nested_level</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L575"></a><tt class="py-lineno">575</tt>  <tt class="py-line">                                                <tt class="py-name">hedge_token</tt><tt class="py-op">=</tt><tt class="py-name">None</tt> </tt>
<a name="L576"></a><tt class="py-lineno">576</tt>  <tt class="py-line">                                                <tt class="py-name">hedge_nested_level</tt><tt class="py-op">=</tt><tt class="py-name">None</tt> </tt>
<a name="L577"></a><tt class="py-lineno">577</tt>  <tt class="py-line">                 </tt>
<a name="L578"></a><tt class="py-lineno">578</tt>  <tt class="py-line">                <tt class="py-comment"># Recorro las marcas e identifico el comienzo y el fin del scope de cada una</tt> </tt>
<a name="L579"></a><tt class="py-lineno">579</tt>  <tt class="py-line">                <tt class="py-comment">#print "Cantidad de marcas:",cant_hc</tt> </tt>
<a name="L580"></a><tt class="py-lineno">580</tt>  <tt class="py-line">                <tt class="py-keyword">for</tt> <tt class="py-op">(</tt><tt class="py-name">instance_number</tt><tt class="py-op">,</tt><tt class="py-name">hedge_token</tt><tt class="py-op">,</tt><tt class="py-name">hedge_pos</tt><tt class="py-op">,</tt><tt class="py-name">hedge_nested_level</tt><tt class="py-op">)</tt> <tt class="py-keyword">in</tt> <tt class="py-name">cues</tt><tt class="py-op">:</tt> </tt>
<a name="L581"></a><tt class="py-lineno">581</tt>  <tt class="py-line">                        <tt class="py-keyword">print</tt> <tt class="py-string">"Posicion "</tt><tt class="py-op">,</tt><tt class="py-name">instance_number</tt><tt class="py-op">,</tt><tt class="py-string">":"</tt><tt class="py-op">,</tt><tt class="py-name">hedge_pos</tt><tt class="py-op">,</tt><tt class="py-name">hedge_token</tt><tt class="py-op">,</tt><tt class="py-string">" Anidamiento:"</tt><tt class="py-op">,</tt><tt class="py-name">hedge_nested_level</tt> </tt>
<a name="L582"></a><tt class="py-lineno">582</tt>  <tt class="py-line">                        <tt class="py-comment"># Recorro la oraci&#243;n de nuevo, buscando cada scope</tt> </tt>
<a name="L583"></a><tt class="py-lineno">583</tt>  <tt class="py-line">                        <tt class="py-name">c2</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'select * from '</tt><tt class="py-op">+</tt> <tt class="py-name">source_table</tt><tt class="py-op">+</tt><tt class="py-string">' where document_id=? and sentence_id=?'</tt><tt class="py-op">,</tt> <tt class="py-op">(</tt><tt class="py-name">sentence</tt><tt class="py-op">[</tt><tt class="py-string">'document_id'</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">sentence</tt><tt class="py-op">[</tt><tt class="py-string">'sentence_id'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L584"></a><tt class="py-lineno">584</tt>  <tt class="py-line">                        <tt class="py-name">first</tt><tt class="py-op">=</tt><tt class="py-name">None</tt> </tt>
<a name="L585"></a><tt class="py-lineno">585</tt>  <tt class="py-line">                        <tt class="py-name">last</tt><tt class="py-op">=</tt><tt class="py-name">None</tt> </tt>
<a name="L586"></a><tt class="py-lineno">586</tt>  <tt class="py-line">                        <tt class="py-name">hedge_xcope_column</tt><tt class="py-op">=</tt><tt class="py-string">'hedge_xcope'</tt><tt class="py-op">+</tt><tt class="py-name">str</tt><tt class="py-op">(</tt><tt class="py-name">hedge_nested_level</tt><tt class="py-op">)</tt> </tt>
<a name="L587"></a><tt class="py-lineno">587</tt>  <tt class="py-line">                        <tt class="py-keyword">for</tt> <tt class="py-name">row</tt> <tt class="py-keyword">in</tt> <tt class="py-name">c2</tt><tt class="py-op">:</tt> </tt>
<a name="L588"></a><tt class="py-lineno">588</tt>  <tt class="py-line">                                        <tt class="py-keyword">if</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-name">hedge_xcope_column</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-string">'B-SPECXCOPE'</tt><tt class="py-op">:</tt> </tt>
<a name="L589"></a><tt class="py-lineno">589</tt>  <tt class="py-line">                                                        <tt class="py-keyword">if</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'token_num'</tt><tt class="py-op">]</tt><tt class="py-op">&lt;=</tt><tt class="py-name">hedge_pos</tt><tt class="py-op">:</tt> </tt>
<a name="L590"></a><tt class="py-lineno">590</tt>  <tt class="py-line">                                                                        <tt class="py-name">first</tt><tt class="py-op">=</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'token_num'</tt><tt class="py-op">]</tt> </tt>
<a name="L591"></a><tt class="py-lineno">591</tt>  <tt class="py-line">                                        <tt class="py-keyword">elif</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-name">hedge_xcope_column</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-string">'I-SPECXCOPE'</tt><tt class="py-op">:</tt> </tt>
<a name="L592"></a><tt class="py-lineno">592</tt>  <tt class="py-line">                                                        <tt class="py-keyword">pass</tt> </tt>
<a name="L593"></a><tt class="py-lineno">593</tt>  <tt class="py-line">                                        <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L594"></a><tt class="py-lineno">594</tt>  <tt class="py-line">                                                        <tt class="py-keyword">if</tt> <tt class="py-name">first</tt> <tt class="py-keyword">and</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'token_num'</tt><tt class="py-op">]</tt><tt class="py-op">&gt;=</tt><tt class="py-name">hedge_pos</tt><tt class="py-op">:</tt> </tt>
<a name="L595"></a><tt class="py-lineno">595</tt>  <tt class="py-line">                                                                        <tt class="py-name">last</tt><tt class="py-op">=</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'token_num'</tt><tt class="py-op">]</tt><tt class="py-op">-</tt><tt class="py-number">1</tt> </tt>
<a name="L596"></a><tt class="py-lineno">596</tt>  <tt class="py-line">                                                                        <tt class="py-keyword">break</tt> </tt>
<a name="L597"></a><tt class="py-lineno">597</tt>  <tt class="py-line">                                                        <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L598"></a><tt class="py-lineno">598</tt>  <tt class="py-line">                                                                        <tt class="py-name">first</tt><tt class="py-op">=</tt><tt class="py-name">None</tt> </tt>
<a name="L599"></a><tt class="py-lineno">599</tt>  <tt class="py-line">                        <tt class="py-keyword">print</tt> <tt class="py-string">"Comienzo de scope: "</tt><tt class="py-op">,</tt><tt class="py-name">first</tt><tt class="py-op">,</tt><tt class="py-string">" Fin de scope "</tt><tt class="py-op">,</tt><tt class="py-name">last</tt> </tt>
<a name="L600"></a><tt class="py-lineno">600</tt>  <tt class="py-line">                         </tt>
<a name="L601"></a><tt class="py-lineno">601</tt>  <tt class="py-line">                        <tt class="py-comment"># Una vez identificado el scope, recorro todos los tokens y voy insertando en la tabla destino</tt> </tt>
<a name="L602"></a><tt class="py-lineno">602</tt>  <tt class="py-line">                        <tt class="py-comment"># Lo hago una vez por cada cue que tenga</tt> </tt>
<a name="L603"></a><tt class="py-lineno">603</tt>  <tt class="py-line">                        <tt class="py-name">c2</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'select * from '</tt><tt class="py-op">+</tt> <tt class="py-name">source_table</tt><tt class="py-op">+</tt><tt class="py-string">' where document_id=? and sentence_id=?'</tt><tt class="py-op">,</tt> <tt class="py-op">(</tt><tt class="py-name">sentence</tt><tt class="py-op">[</tt><tt class="py-string">'document_id'</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">sentence</tt><tt class="py-op">[</tt><tt class="py-string">'sentence_id'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L604"></a><tt class="py-lineno">604</tt>  <tt class="py-line">                        <tt class="py-keyword">for</tt> <tt class="py-name">row</tt> <tt class="py-keyword">in</tt> <tt class="py-name">c2</tt><tt class="py-op">:</tt> </tt>
<a name="L605"></a><tt class="py-lineno">605</tt>  <tt class="py-line">                         </tt>
<a name="L606"></a><tt class="py-lineno">606</tt>  <tt class="py-line">                                <tt class="py-comment"># Copio las columnas originales</tt> </tt>
<a name="L607"></a><tt class="py-lineno">607</tt>  <tt class="py-line">                                <tt class="py-name">valores</tt><tt class="py-op">=</tt><tt class="py-op">[</tt><tt class="py-op">]</tt> </tt>
<a name="L608"></a><tt class="py-lineno">608</tt>  <tt class="py-line">                                <tt class="py-keyword">for</tt> <tt class="py-name">k</tt> <tt class="py-keyword">in</tt> <tt class="py-name">row</tt><tt class="py-op">.</tt><tt class="py-name">keys</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt>     </tt>
<a name="L609"></a><tt class="py-lineno">609</tt>  <tt class="py-line">                                        <tt class="py-name">valores</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-name">k</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> </tt>
<a name="L610"></a><tt class="py-lineno">610</tt>  <tt class="py-line">                                 </tt>
<a name="L611"></a><tt class="py-lineno">611</tt>  <tt class="py-line">                                <tt class="py-comment"># Agrego la columna instance_number</tt> </tt>
<a name="L612"></a><tt class="py-lineno">612</tt>  <tt class="py-line">                                <tt class="py-name">valores</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">instance_number</tt><tt class="py-op">)</tt> </tt>
<a name="L613"></a><tt class="py-lineno">613</tt>  <tt class="py-line">                                 </tt>
<a name="L614"></a><tt class="py-lineno">614</tt>  <tt class="py-line">                                <tt class="py-comment"># Agrego la columna hc_token</tt> </tt>
<a name="L615"></a><tt class="py-lineno">615</tt>  <tt class="py-line">                                <tt class="py-name">valores</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">hedge_token</tt><tt class="py-op">)</tt> </tt>
<a name="L616"></a><tt class="py-lineno">616</tt>  <tt class="py-line">                                 </tt>
<a name="L617"></a><tt class="py-lineno">617</tt>  <tt class="py-line">                                <tt class="py-comment">#Determino el valor del scope, en formato FOL, y lo agrego</tt> </tt>
<a name="L618"></a><tt class="py-lineno">618</tt>  <tt class="py-line">                                <tt class="py-keyword">if</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'token_num'</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-name">first</tt><tt class="py-op">:</tt> </tt>
<a name="L619"></a><tt class="py-lineno">619</tt>  <tt class="py-line">                                        <tt class="py-name">xcope</tt><tt class="py-op">=</tt><tt class="py-string">'F'</tt> </tt>
<a name="L620"></a><tt class="py-lineno">620</tt>  <tt class="py-line">                                <tt class="py-keyword">elif</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'token_num'</tt><tt class="py-op">]</tt><tt class="py-op">==</tt><tt class="py-name">last</tt><tt class="py-op">:</tt> </tt>
<a name="L621"></a><tt class="py-lineno">621</tt>  <tt class="py-line">                                        <tt class="py-name">xcope</tt><tt class="py-op">=</tt><tt class="py-string">'L'</tt>        </tt>
<a name="L622"></a><tt class="py-lineno">622</tt>  <tt class="py-line">                                <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L623"></a><tt class="py-lineno">623</tt>  <tt class="py-line">                                        <tt class="py-name">xcope</tt><tt class="py-op">=</tt><tt class="py-string">'O'</tt> </tt>
<a name="L624"></a><tt class="py-lineno">624</tt>  <tt class="py-line">                                         </tt>
<a name="L625"></a><tt class="py-lineno">625</tt>  <tt class="py-line">                                <tt class="py-name">valores</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">xcope</tt><tt class="py-op">)</tt> </tt>
<a name="L626"></a><tt class="py-lineno">626</tt>  <tt class="py-line">         </tt>
<a name="L627"></a><tt class="py-lineno">627</tt>  <tt class="py-line">                                <tt class="py-name">c3</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'insert into '</tt><tt class="py-op">+</tt> <tt class="py-name">target_table</tt><tt class="py-op">+</tt><tt class="py-string">' ('</tt><tt class="py-op">+</tt> <tt class="py-name">columnas</tt><tt class="py-op">+</tt><tt class="py-string">') values ('</tt><tt class="py-op">+</tt> <tt class="py-name">interrogantes</tt><tt class="py-op">+</tt><tt class="py-string">')'</tt><tt class="py-op">,</tt><tt class="py-name">valores</tt><tt class="py-op">)</tt> </tt>
<a name="L628"></a><tt class="py-lineno">628</tt>  <tt class="py-line">                        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">commit</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L629"></a><tt class="py-lineno">629</tt>  <tt class="py-line"> </tt>
<a name="add_guessed_hedge_cue"></a><div id="add_guessed_hedge_cue-def"><a name="L630"></a><tt class="py-lineno">630</tt> <a class="py-toggle" href="#" id="add_guessed_hedge_cue-toggle" onclick="return toggle('add_guessed_hedge_cue');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#add_guessed_hedge_cue">add_guessed_hedge_cue</a><tt class="py-op">(</tt><tt class="py-param">dbname</tt><tt class="py-op">,</tt><tt class="py-param">tablename</tt><tt class="py-op">,</tt><tt class="py-param">test_filename</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="add_guessed_hedge_cue-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="add_guessed_hedge_cue-expanded"><a name="L631"></a><tt class="py-lineno">631</tt>  <tt class="py-line">        <tt class="py-docstring">"""</tt> </tt>
<a name="L632"></a><tt class="py-lineno">632</tt>  <tt class="py-line"><tt class="py-docstring">        Agrega la columna guessed_hedge_cue para incorporar el resultado de una evaluaci&#243;n de hedge_cue</tt> </tt>
<a name="L633"></a><tt class="py-lineno">633</tt>  <tt class="py-line"><tt class="py-docstring">        que ser&#225; utilizado como atributo para aprender el scope</tt> </tt>
<a name="L634"></a><tt class="py-lineno">634</tt>  <tt class="py-line"><tt class="py-docstring">        @arg dbname: nombre del archivo que tiene la base de datos</tt> </tt>
<a name="L635"></a><tt class="py-lineno">635</tt>  <tt class="py-line"><tt class="py-docstring">        @type dbname:C{string}</tt> </tt>
<a name="L636"></a><tt class="py-lineno">636</tt>  <tt class="py-line"><tt class="py-docstring">        @arg tablename: nombre de la tabla a la que se le agrega la columna</tt> </tt>
<a name="L637"></a><tt class="py-lineno">637</tt>  <tt class="py-line"><tt class="py-docstring">        @type tablename:C{string}</tt> </tt>
<a name="L638"></a><tt class="py-lineno">638</tt>  <tt class="py-line"><tt class="py-docstring">        @arg test_filename: archivo de testeo de donde obtengo el valor de la hedge cue aprendido</tt> </tt>
<a name="L639"></a><tt class="py-lineno">639</tt>  <tt class="py-line"><tt class="py-docstring">        @type test_filename:C{string}</tt> </tt>
<a name="L640"></a><tt class="py-lineno">640</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L641"></a><tt class="py-lineno">641</tt>  <tt class="py-line">         </tt>
<a name="L642"></a><tt class="py-lineno">642</tt>  <tt class="py-line">          </tt>
<a name="L643"></a><tt class="py-lineno">643</tt>  <tt class="py-line">        <tt class="py-comment"># Inicializo la conecci&#243;n a la base de datos</tt> </tt>
<a name="L644"></a><tt class="py-lineno">644</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">=</tt> <tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">connect</tt><tt class="py-op">(</tt><tt class="py-name">dbname</tt><tt class="py-op">)</tt>    </tt>
<a name="L645"></a><tt class="py-lineno">645</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">text_factory</tt> <tt class="py-op">=</tt> <tt class="py-name">str</tt> </tt>
<a name="L646"></a><tt class="py-lineno">646</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">row_factory</tt><tt class="py-op">=</tt><tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">Row</tt> </tt>
<a name="L647"></a><tt class="py-lineno">647</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">=</tt><tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">cursor</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L648"></a><tt class="py-lineno">648</tt>  <tt class="py-line"> </tt>
<a name="L649"></a><tt class="py-lineno">649</tt>  <tt class="py-line"> </tt>
<a name="L650"></a><tt class="py-lineno">650</tt>  <tt class="py-line">        <tt class="py-comment"># Agrego la columna guessed_hedge_cue y la inicializo en O</tt> </tt>
<a name="L651"></a><tt class="py-lineno">651</tt>  <tt class="py-line">        <tt class="py-keyword">try</tt><tt class="py-op">:</tt>     </tt>
<a name="L652"></a><tt class="py-lineno">652</tt>  <tt class="py-line">                <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'alter table '</tt><tt class="py-op">+</tt><tt class="py-name">tablename</tt><tt class="py-op">+</tt><tt class="py-string">' add column guessed_hedge_cue'</tt><tt class="py-op">)</tt> </tt>
<a name="L653"></a><tt class="py-lineno">653</tt>  <tt class="py-line">        <tt class="py-keyword">except</tt> <tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">OperationalError</tt><tt class="py-op">:</tt> </tt>
<a name="L654"></a><tt class="py-lineno">654</tt>  <tt class="py-line">                        <tt class="py-keyword">pass</tt>     </tt>
<a name="L655"></a><tt class="py-lineno">655</tt>  <tt class="py-line">         </tt>
<a name="L656"></a><tt class="py-lineno">656</tt>  <tt class="py-line">         </tt>
<a name="L657"></a><tt class="py-lineno">657</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'update '</tt><tt class="py-op">+</tt> <tt class="py-name">tablename</tt><tt class="py-op">+</tt> <tt class="py-string">' set guessed_hedge_cue=\'O\''</tt><tt class="py-op">)</tt> </tt>
<a name="L658"></a><tt class="py-lineno">658</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">commit</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L659"></a><tt class="py-lineno">659</tt>  <tt class="py-line">         </tt>
<a name="L660"></a><tt class="py-lineno">660</tt>  <tt class="py-line">        <tt class="py-comment"># Abreo el archivo y lo voy recorriendo</tt> </tt>
<a name="L661"></a><tt class="py-lineno">661</tt>  <tt class="py-line">        <tt class="py-comment"># Asumo que en las primeras tres filas est&#225; el documento, la oraici&#243;n y el token_num</tt> </tt>
<a name="L662"></a><tt class="py-lineno">662</tt>  <tt class="py-line">        <tt class="py-name">f</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">test_filename</tt><tt class="py-op">,</tt><tt class="py-string">'r'</tt><tt class="py-op">)</tt> </tt>
<a name="L663"></a><tt class="py-lineno">663</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">line</tt> <tt class="py-keyword">in</tt> <tt class="py-name">f</tt><tt class="py-op">:</tt> </tt>
<a name="L664"></a><tt class="py-lineno">664</tt>  <tt class="py-line">                <tt class="py-keyword">if</tt> <tt class="py-name">line</tt> <tt class="py-op">!=</tt><tt class="py-string">'\n'</tt><tt class="py-op">:</tt> </tt>
<a name="L665"></a><tt class="py-lineno">665</tt>  <tt class="py-line">                        <tt class="py-name">tokens</tt><tt class="py-op">=</tt><tt class="py-name">line</tt><tt class="py-op">.</tt><tt class="py-name">rstrip</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">.</tt><tt class="py-name">split</tt><tt class="py-op">(</tt><tt class="py-string">'\t'</tt><tt class="py-op">)</tt>                         </tt>
<a name="L666"></a><tt class="py-lineno">666</tt>  <tt class="py-line">                        <tt class="py-name">document_id</tt><tt class="py-op">=</tt><tt class="py-name">tokens</tt><tt class="py-op">[</tt><tt class="py-number">0</tt><tt class="py-op">]</tt> </tt>
<a name="L667"></a><tt class="py-lineno">667</tt>  <tt class="py-line">                        <tt class="py-name">sentence_id</tt><tt class="py-op">=</tt><tt class="py-name">tokens</tt><tt class="py-op">[</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> </tt>
<a name="L668"></a><tt class="py-lineno">668</tt>  <tt class="py-line">                        <tt class="py-name">token_num</tt><tt class="py-op">=</tt><tt class="py-name">tokens</tt><tt class="py-op">[</tt><tt class="py-number">2</tt><tt class="py-op">]</tt>                                       </tt>
<a name="L669"></a><tt class="py-lineno">669</tt>  <tt class="py-line">                        <tt class="py-keyword">if</tt> <tt class="py-name">tokens</tt><tt class="py-op">[</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> <tt class="py-op">!=</tt> <tt class="py-string">'O'</tt><tt class="py-op">:</tt> </tt>
<a name="L670"></a><tt class="py-lineno">670</tt>  <tt class="py-line">                                <tt class="py-keyword">print</tt> <tt class="py-string">"Actualizo "</tt><tt class="py-op">,</tt> <tt class="py-name">document_id</tt><tt class="py-op">,</tt> <tt class="py-name">sentence_id</tt><tt class="py-op">,</tt> <tt class="py-name">token_num</tt><tt class="py-op">,</tt> <tt class="py-name">tokens</tt><tt class="py-op">[</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt> </tt>
<a name="L671"></a><tt class="py-lineno">671</tt>  <tt class="py-line">                                <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'update '</tt><tt class="py-op">+</tt> <tt class="py-name">tablename</tt><tt class="py-op">+</tt><tt class="py-string">' set guessed_hedge_cue=? where document_id=? and sentence_id=? and token_num=?'</tt><tt class="py-op">,</tt><tt class="py-op">(</tt><tt class="py-name">tokens</tt><tt class="py-op">[</tt><tt class="py-op">-</tt><tt class="py-number">1</tt><tt class="py-op">]</tt><tt class="py-op">,</tt><tt class="py-name">document_id</tt><tt class="py-op">,</tt><tt class="py-name">sentence_id</tt><tt class="py-op">,</tt><tt class="py-name">token_num</tt><tt class="py-op">)</tt><tt class="py-op">)</tt> </tt>
<a name="L672"></a><tt class="py-lineno">672</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">commit</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L673"></a><tt class="py-lineno">673</tt>  <tt class="py-line">        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
</div><a name="L674"></a><tt class="py-lineno">674</tt>  <tt class="py-line">                 </tt>
<a name="L675"></a><tt class="py-lineno">675</tt>  <tt class="py-line">                         </tt>
<a name="L676"></a><tt class="py-lineno">676</tt>  <tt class="py-line"> </tt>
<a name="gen_conll_file"></a><div id="gen_conll_file-def"><a name="L677"></a><tt class="py-lineno">677</tt> <a class="py-toggle" href="#" id="gen_conll_file-toggle" onclick="return toggle('gen_conll_file');">-</a><tt class="py-line"><tt class="py-keyword">def</tt> <a class="py-def-name" href="pln_inco.bioscope.scripts-module.html#gen_conll_file">gen_conll_file</a><tt class="py-op">(</tt><tt class="py-param">dbname</tt><tt class="py-op">,</tt><tt class="py-param">tablename</tt><tt class="py-op">,</tt><tt class="py-param">filename</tt><tt class="py-op">,</tt><tt class="py-param">xs</tt><tt class="py-op">,</tt><tt class="py-param">y</tt><tt class="py-op">,</tt> <tt class="py-param">has_instances</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
</div><div id="gen_conll_file-collapsed" style="display:none;" pad="+++" indent="++++"></div><div id="gen_conll_file-expanded"><a name="L678"></a><tt class="py-lineno">678</tt>  <tt class="py-line">        <tt class="py-docstring">""" </tt> </tt>
<a name="L679"></a><tt class="py-lineno">679</tt>  <tt class="py-line"><tt class="py-docstring">        Genera el archivo para el entrenamiento/evaluaci&#243;n con CRF++, a partir de la tabla de bioscope que se le indique</tt> </tt>
<a name="L680"></a><tt class="py-lineno">680</tt>  <tt class="py-line"><tt class="py-docstring">        Este archivo est&#225; en formato CoNLL, tiene una l&#237;nea por token, los atributos est&#225;n</tt> </tt>
<a name="L681"></a><tt class="py-lineno">681</tt>  <tt class="py-line"><tt class="py-docstring">        separados por espacio, y el &#250;ltimo es el que vamos a usar para clasificar. Las oraciones est&#225;&#173;n separadas por l&#237;neas</tt> </tt>
<a name="L682"></a><tt class="py-lineno">682</tt>  <tt class="py-line"><tt class="py-docstring">        en blanco</tt> </tt>
<a name="L683"></a><tt class="py-lineno">683</tt>  <tt class="py-line"><tt class="py-docstring">        @arg dbname: nombre del archivo que tiene la base de datos</tt> </tt>
<a name="L684"></a><tt class="py-lineno">684</tt>  <tt class="py-line"><tt class="py-docstring">        @type dbname:C{string}</tt> </tt>
<a name="L685"></a><tt class="py-lineno">685</tt>  <tt class="py-line"><tt class="py-docstring">        @arg tablename: nombre de la tabla a partir de la cual generar el archivo</tt> </tt>
<a name="L686"></a><tt class="py-lineno">686</tt>  <tt class="py-line"><tt class="py-docstring">        @type tablename:C{string}</tt> </tt>
<a name="L687"></a><tt class="py-lineno">687</tt>  <tt class="py-line"><tt class="py-docstring">        @arg xs: lista de atributos a generar. Tienen que ser iguales a las columnas de la tabla de atributos de bioscope. No incluyen la clase a aprender.</tt> </tt>
<a name="L688"></a><tt class="py-lineno">688</tt>  <tt class="py-line"><tt class="py-docstring">        @type xs: List</tt> </tt>
<a name="L689"></a><tt class="py-lineno">689</tt>  <tt class="py-line"><tt class="py-docstring">        @arg y: Clase a aprender (es uno de los atributos)</tt> </tt>
<a name="L690"></a><tt class="py-lineno">690</tt>  <tt class="py-line"><tt class="py-docstring">        @type y:List</tt> </tt>
<a name="L691"></a><tt class="py-lineno">691</tt>  <tt class="py-line"><tt class="py-docstring">        @arg has_instances: indica si la tabla tiene varias instancias de la oraci&#243;n</tt> </tt>
<a name="L692"></a><tt class="py-lineno">692</tt>  <tt class="py-line"><tt class="py-docstring">        @type has_instances: Bool</tt> </tt>
<a name="L693"></a><tt class="py-lineno">693</tt>  <tt class="py-line"><tt class="py-docstring">        """</tt> </tt>
<a name="L694"></a><tt class="py-lineno">694</tt>  <tt class="py-line"> </tt>
<a name="L695"></a><tt class="py-lineno">695</tt>  <tt class="py-line">        <tt class="py-name">content</tt><tt class="py-op">=</tt><tt class="py-string">''</tt>       </tt>
<a name="L696"></a><tt class="py-lineno">696</tt>  <tt class="py-line">        <tt class="py-name">t0</tt><tt class="py-op">=</tt><tt class="py-name">time</tt><tt class="py-op">.</tt><tt class="py-name">clock</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L697"></a><tt class="py-lineno">697</tt>  <tt class="py-line">        <tt class="py-name">f</tt><tt class="py-op">=</tt><tt class="py-name">open</tt><tt class="py-op">(</tt><tt class="py-name">filename</tt><tt class="py-op">,</tt><tt class="py-string">'w+'</tt><tt class="py-op">)</tt> </tt>
<a name="L698"></a><tt class="py-lineno">698</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">=</tt> <tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">connect</tt><tt class="py-op">(</tt><tt class="py-name">dbname</tt><tt class="py-op">)</tt>    </tt>
<a name="L699"></a><tt class="py-lineno">699</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">text_factory</tt> <tt class="py-op">=</tt> <tt class="py-name">str</tt> </tt>
<a name="L700"></a><tt class="py-lineno">700</tt>  <tt class="py-line">        <tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">row_factory</tt><tt class="py-op">=</tt><tt class="py-name">sqlite3</tt><tt class="py-op">.</tt><tt class="py-name">Row</tt> </tt>
<a name="L701"></a><tt class="py-lineno">701</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">=</tt><tt class="py-name">conn</tt><tt class="py-op">.</tt><tt class="py-name">cursor</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L702"></a><tt class="py-lineno">702</tt>  <tt class="py-line">         </tt>
<a name="L703"></a><tt class="py-lineno">703</tt>  <tt class="py-line">        <tt class="py-comment"># Armo la lista separada por comas de los atributos</tt> </tt>
<a name="L704"></a><tt class="py-lineno">704</tt>  <tt class="py-line">        <tt class="py-comment"># Por supuesto deben llamarse igual que las columnas de la tabla</tt> </tt>
<a name="L705"></a><tt class="py-lineno">705</tt>  <tt class="py-line">        <tt class="py-name">x2s</tt><tt class="py-op">=</tt><tt class="py-op">[</tt><tt class="py-op">]</tt> </tt>
<a name="L706"></a><tt class="py-lineno">706</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">x</tt> <tt class="py-keyword">in</tt> <tt class="py-name">xs</tt><tt class="py-op">:</tt> </tt>
<a name="L707"></a><tt class="py-lineno">707</tt>  <tt class="py-line">                <tt class="py-name">x2s</tt><tt class="py-op">.</tt><tt class="py-name">append</tt><tt class="py-op">(</tt><tt class="py-name">x</tt><tt class="py-op">)</tt> </tt>
<a name="L708"></a><tt class="py-lineno">708</tt>  <tt class="py-line">        <tt class="py-name">cabezal_select</tt><tt class="py-op">=</tt><tt class="py-string">','</tt><tt class="py-op">.</tt><tt class="py-name">join</tt><tt class="py-op">(</tt><tt class="py-name">xs</tt><tt class="py-op">)</tt> </tt>
<a name="L709"></a><tt class="py-lineno">709</tt>  <tt class="py-line">        <tt class="py-name">cabezal_select</tt><tt class="py-op">=</tt><tt class="py-name">cabezal_select</tt><tt class="py-op">+</tt><tt class="py-string">','</tt><tt class="py-op">+</tt><tt class="py-name">y</tt><tt class="py-op">+</tt><tt class="py-string">' '</tt> </tt>
<a name="L710"></a><tt class="py-lineno">710</tt>  <tt class="py-line"> </tt>
<a name="L711"></a><tt class="py-lineno">711</tt>  <tt class="py-line">        <tt class="py-keyword">if</tt> <tt class="py-name">has_instances</tt><tt class="py-op">:</tt> </tt>
<a name="L712"></a><tt class="py-lineno">712</tt>  <tt class="py-line">                <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'select document_id,sentence_id,token_num,instance_number,'</tt><tt class="py-op">+</tt><tt class="py-name">cabezal_select</tt><tt class="py-op">+</tt><tt class="py-string">' from '</tt><tt class="py-op">+</tt><tt class="py-name">tablename</tt><tt class="py-op">+</tt><tt class="py-string">'  order by document_id,sentence_id,instance_number,token_num'</tt><tt class="py-op">)</tt> </tt>
<a name="L713"></a><tt class="py-lineno">713</tt>  <tt class="py-line">        <tt class="py-keyword">else</tt><tt class="py-op">:</tt> </tt>
<a name="L714"></a><tt class="py-lineno">714</tt>  <tt class="py-line">                <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">execute</tt><tt class="py-op">(</tt><tt class="py-string">'select document_id,sentence_id,token_num '</tt><tt class="py-op">+</tt><tt class="py-name">cabezal_select</tt><tt class="py-op">+</tt><tt class="py-string">' from '</tt><tt class="py-op">+</tt><tt class="py-name">tablename</tt><tt class="py-op">+</tt><tt class="py-string">'  order by document_id,sentence_id,token_num'</tt><tt class="py-op">)</tt> </tt>
<a name="L715"></a><tt class="py-lineno">715</tt>  <tt class="py-line">        <tt class="py-name">prev_sentence_id</tt><tt class="py-op">=</tt><tt class="py-string">'-1'</tt>    </tt>
<a name="L716"></a><tt class="py-lineno">716</tt>  <tt class="py-line">        <tt class="py-name">prev_instance</tt><tt class="py-op">=</tt><tt class="py-op">-</tt><tt class="py-number">1</tt> </tt>
<a name="L717"></a><tt class="py-lineno">717</tt>  <tt class="py-line">        <tt class="py-keyword">for</tt> <tt class="py-name">row</tt> <tt class="py-keyword">in</tt> <tt class="py-name">c</tt><tt class="py-op">:</tt> </tt>
<a name="L718"></a><tt class="py-lineno">718</tt>  <tt class="py-line">                                <tt class="py-keyword">if</tt> <tt class="py-op">(</tt><tt class="py-name">prev_sentence_id</tt> <tt class="py-op">!=</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'sentence_id'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt> <tt class="py-keyword">or</tt> <tt class="py-op">(</tt><tt class="py-name">has_instances</tt> <tt class="py-keyword">and</tt> <tt class="py-name">prev_instance</tt> <tt class="py-op">!=</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'instance_number'</tt><tt class="py-op">]</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L719"></a><tt class="py-lineno">719</tt>  <tt class="py-line">                                        <tt class="py-comment">#Fin de la oraci&#243;n, dejo un espacio en blanco, excepto en la primera</tt> </tt>
<a name="L720"></a><tt class="py-lineno">720</tt>  <tt class="py-line">                                        <tt class="py-keyword">if</tt> <tt class="py-name">prev_sentence_id</tt> <tt class="py-op">!=</tt> <tt class="py-string">'-1'</tt><tt class="py-op">:</tt>                                     </tt>
<a name="L721"></a><tt class="py-lineno">721</tt>  <tt class="py-line">                                                <tt class="py-name">content</tt><tt class="py-op">=</tt><tt class="py-name">content</tt><tt class="py-op">+</tt><tt class="py-string">'\n'</tt> </tt>
<a name="L722"></a><tt class="py-lineno">722</tt>  <tt class="py-line">                                        <tt class="py-name">prev_sentence_id</tt> <tt class="py-op">=</tt> <tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'sentence_id'</tt><tt class="py-op">]</tt> </tt>
<a name="L723"></a><tt class="py-lineno">723</tt>  <tt class="py-line">                                        <tt class="py-keyword">if</tt> <tt class="py-name">has_instances</tt><tt class="py-op">:</tt>  </tt>
<a name="L724"></a><tt class="py-lineno">724</tt>  <tt class="py-line">                                                <tt class="py-name">prev_instance</tt><tt class="py-op">=</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-string">'instance_number'</tt><tt class="py-op">]</tt> </tt>
<a name="L725"></a><tt class="py-lineno">725</tt>  <tt class="py-line">                                <tt class="py-keyword">for</tt> <tt class="py-name">k</tt> <tt class="py-keyword">in</tt> <tt class="py-name">row</tt><tt class="py-op">.</tt><tt class="py-name">keys</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">:</tt> </tt>
<a name="L726"></a><tt class="py-lineno">726</tt>  <tt class="py-line">                                                <tt class="py-name">content</tt><tt class="py-op">=</tt><tt class="py-name">content</tt><tt class="py-op">+</tt><tt class="py-name">str</tt><tt class="py-op">(</tt><tt class="py-name">row</tt><tt class="py-op">[</tt><tt class="py-name">k</tt><tt class="py-op">]</tt><tt class="py-op">)</tt><tt class="py-op">+</tt><tt class="py-string">'\t'</tt> </tt>
<a name="L727"></a><tt class="py-lineno">727</tt>  <tt class="py-line">                                <tt class="py-comment">#Borro el &#250;ltimo tabulador</tt> </tt>
<a name="L728"></a><tt class="py-lineno">728</tt>  <tt class="py-line">                                <tt class="py-name">content</tt><tt class="py-op">=</tt><tt class="py-name">rstrip</tt><tt class="py-op">(</tt><tt class="py-name">content</tt><tt class="py-op">)</tt> </tt>
<a name="L729"></a><tt class="py-lineno">729</tt>  <tt class="py-line">                                <tt class="py-name">content</tt><tt class="py-op">=</tt><tt class="py-name">content</tt><tt class="py-op">+</tt><tt class="py-string">'\n'</tt>     </tt>
<a name="L730"></a><tt class="py-lineno">730</tt>  <tt class="py-line">                                <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">write</tt><tt class="py-op">(</tt><tt class="py-name">content</tt><tt class="py-op">)</tt> </tt>
<a name="L731"></a><tt class="py-lineno">731</tt>  <tt class="py-line">                                <tt class="py-name">content</tt><tt class="py-op">=</tt><tt class="py-string">''</tt> </tt>
<a name="L732"></a><tt class="py-lineno">732</tt>  <tt class="py-line">        <tt class="py-name">f</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L733"></a><tt class="py-lineno">733</tt>  <tt class="py-line">        <tt class="py-name">c</tt><tt class="py-op">.</tt><tt class="py-name">close</tt><tt class="py-op">(</tt><tt class="py-op">)</tt> </tt>
<a name="L734"></a><tt class="py-lineno">734</tt>  <tt class="py-line">        <tt class="py-keyword">print</tt> <tt class="py-string">'Tiempo del proceso:'</tt><tt class="py-op">,</tt> <tt class="py-name">time</tt><tt class="py-op">.</tt><tt class="py-name">clock</tt><tt class="py-op">(</tt><tt class="py-op">)</tt><tt class="py-op">-</tt><tt class="py-name">t0</tt> </tt>
</div><a name="L735"></a><tt class="py-lineno">735</tt>  <tt class="py-line"> </tt><script type="text/javascript">
<!--
expandto(location.href);
// -->
</script>
</pre>
<br />
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="pln_inco-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

      <th class="navbar" width="100%"></th>
  </tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
  <tr>
    <td align="left" class="footer">
    Generated by Epydoc 3.0.1 on Tue Apr 26 01:04:51 2011
    </td>
    <td align="right" class="footer">
      <a target="mainFrame" href="http://epydoc.sourceforge.net"
        >http://epydoc.sourceforge.net</a>
    </td>
  </tr>
</table>

<script type="text/javascript">
  <!--
  // Private objects are initially displayed (because if
  // javascript is turned off then we want them to be
  // visible); but by default, we want to hide them.  So hide
  // them unless we have a cookie that says to show them.
  checkCookie();
  // -->
</script>
</body>
</html>
